1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a printer that converts from our internal representation 11 // of machine-dependent LLVM code to NVPTX assembly language. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "NVPTXAsmPrinter.h" 16 #include "InstPrinter/NVPTXInstPrinter.h" 17 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 18 #include "NVPTX.h" 19 #include "NVPTXInstrInfo.h" 20 #include "NVPTXMCExpr.h" 21 #include "NVPTXMachineFunctionInfo.h" 22 #include "NVPTXRegisterInfo.h" 23 #include "NVPTXTargetMachine.h" 24 #include "NVPTXUtilities.h" 25 #include "cl_common_defines.h" 26 #include "llvm/ADT/StringExtras.h" 27 #include "llvm/Analysis/ConstantFolding.h" 28 #include "llvm/CodeGen/Analysis.h" 29 #include "llvm/CodeGen/MachineFrameInfo.h" 30 #include "llvm/CodeGen/MachineLoopInfo.h" 31 #include "llvm/CodeGen/MachineModuleInfo.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/IR/DebugInfo.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/Function.h" 36 #include "llvm/IR/GlobalVariable.h" 37 #include "llvm/IR/Mangler.h" 38 #include "llvm/IR/Module.h" 39 #include "llvm/IR/Operator.h" 40 #include "llvm/MC/MCInst.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/Support/CommandLine.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/FormattedStream.h" 46 #include "llvm/Support/Path.h" 47 #include "llvm/Support/TargetRegistry.h" 48 #include "llvm/Support/TimeValue.h" 49 #include "llvm/Target/TargetLoweringObjectFile.h" 50 #include "llvm/Transforms/Utils/UnrollLoop.h" 51 #include <sstream> 52 using namespace llvm; 53 54 #define DEPOTNAME "__local_depot" 55 56 static cl::opt<bool> 57 EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden, 58 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 59 cl::init(true)); 60 61 static cl::opt<bool> 62 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden, 63 cl::desc("NVPTX Specific: Emit source line in ptx file"), 64 cl::init(false)); 65 66 namespace { 67 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 68 /// depends. 69 void DiscoverDependentGlobals(const Value *V, 70 DenseSet<const GlobalVariable *> &Globals) { 71 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 72 Globals.insert(GV); 73 else { 74 if (const User *U = dyn_cast<User>(V)) { 75 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 76 DiscoverDependentGlobals(U->getOperand(i), Globals); 77 } 78 } 79 } 80 } 81 82 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 83 /// instances to be emitted, but only after any dependents have been added 84 /// first. 85 void VisitGlobalVariableForEmission( 86 const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order, 87 DenseSet<const GlobalVariable *> &Visited, 88 DenseSet<const GlobalVariable *> &Visiting) { 89 // Have we already visited this one? 90 if (Visited.count(GV)) 91 return; 92 93 // Do we have a circular dependency? 94 if (!Visiting.insert(GV).second) 95 report_fatal_error("Circular dependency found in global variable set"); 96 97 // Make sure we visit all dependents first 98 DenseSet<const GlobalVariable *> Others; 99 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 100 DiscoverDependentGlobals(GV->getOperand(i), Others); 101 102 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 103 E = Others.end(); 104 I != E; ++I) 105 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 106 107 // Now we can visit ourself 108 Order.push_back(GV); 109 Visited.insert(GV); 110 Visiting.erase(GV); 111 } 112 } 113 114 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 115 if (!EmitLineNumbers) 116 return; 117 if (ignoreLoc(MI)) 118 return; 119 120 const DebugLoc &curLoc = MI.getDebugLoc(); 121 122 if (!prevDebugLoc && !curLoc) 123 return; 124 125 if (prevDebugLoc == curLoc) 126 return; 127 128 prevDebugLoc = curLoc; 129 130 if (!curLoc) 131 return; 132 133 auto *Scope = cast_or_null<DIScope>(curLoc.getScope()); 134 if (!Scope) 135 return; 136 137 StringRef fileName(Scope->getFilename()); 138 StringRef dirName(Scope->getDirectory()); 139 SmallString<128> FullPathName = dirName; 140 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 141 sys::path::append(FullPathName, fileName); 142 fileName = FullPathName; 143 } 144 145 if (filenameMap.find(fileName) == filenameMap.end()) 146 return; 147 148 // Emit the line from the source file. 149 if (InterleaveSrc) 150 this->emitSrcInText(fileName, curLoc.getLine()); 151 152 std::stringstream temp; 153 temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine() 154 << " " << curLoc.getCol(); 155 OutStreamer->EmitRawText(temp.str()); 156 } 157 158 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 159 SmallString<128> Str; 160 raw_svector_ostream OS(Str); 161 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) 162 emitLineNumberAsDotLoc(*MI); 163 164 MCInst Inst; 165 lowerToMCInst(MI, Inst); 166 EmitToStreamer(*OutStreamer, Inst); 167 } 168 169 // Handle symbol backtracking for targets that do not support image handles 170 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, 171 unsigned OpNo, MCOperand &MCOp) { 172 const MachineOperand &MO = MI->getOperand(OpNo); 173 const MCInstrDesc &MCID = MI->getDesc(); 174 175 if (MCID.TSFlags & NVPTXII::IsTexFlag) { 176 // This is a texture fetch, so operand 4 is a texref and operand 5 is 177 // a samplerref 178 if (OpNo == 4 && MO.isImm()) { 179 lowerImageHandleSymbol(MO.getImm(), MCOp); 180 return true; 181 } 182 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { 183 lowerImageHandleSymbol(MO.getImm(), MCOp); 184 return true; 185 } 186 187 return false; 188 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { 189 unsigned VecSize = 190 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); 191 192 // For a surface load of vector size N, the Nth operand will be the surfref 193 if (OpNo == VecSize && MO.isImm()) { 194 lowerImageHandleSymbol(MO.getImm(), MCOp); 195 return true; 196 } 197 198 return false; 199 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { 200 // This is a surface store, so operand 0 is a surfref 201 if (OpNo == 0 && MO.isImm()) { 202 lowerImageHandleSymbol(MO.getImm(), MCOp); 203 return true; 204 } 205 206 return false; 207 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { 208 // This is a query, so operand 1 is a surfref/texref 209 if (OpNo == 1 && MO.isImm()) { 210 lowerImageHandleSymbol(MO.getImm(), MCOp); 211 return true; 212 } 213 214 return false; 215 } 216 217 return false; 218 } 219 220 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { 221 // Ewwww 222 TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget()); 223 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM); 224 const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>(); 225 const char *Sym = MFI->getImageHandleSymbol(Index); 226 std::string *SymNamePtr = 227 nvTM.getManagedStrPool()->getManagedString(Sym); 228 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol( 229 StringRef(SymNamePtr->c_str()))); 230 } 231 232 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 233 OutMI.setOpcode(MI->getOpcode()); 234 // Special: Do not mangle symbol operand of CALL_PROTOTYPE 235 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { 236 const MachineOperand &MO = MI->getOperand(0); 237 OutMI.addOperand(GetSymbolRef( 238 OutContext.getOrCreateSymbol(Twine(MO.getSymbolName())))); 239 return; 240 } 241 242 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 243 const MachineOperand &MO = MI->getOperand(i); 244 245 MCOperand MCOp; 246 if (!nvptxSubtarget->hasImageHandles()) { 247 if (lowerImageHandleOperand(MI, i, MCOp)) { 248 OutMI.addOperand(MCOp); 249 continue; 250 } 251 } 252 253 if (lowerOperand(MO, MCOp)) 254 OutMI.addOperand(MCOp); 255 } 256 } 257 258 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 259 MCOperand &MCOp) { 260 switch (MO.getType()) { 261 default: llvm_unreachable("unknown operand type"); 262 case MachineOperand::MO_Register: 263 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg())); 264 break; 265 case MachineOperand::MO_Immediate: 266 MCOp = MCOperand::createImm(MO.getImm()); 267 break; 268 case MachineOperand::MO_MachineBasicBlock: 269 MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( 270 MO.getMBB()->getSymbol(), OutContext)); 271 break; 272 case MachineOperand::MO_ExternalSymbol: 273 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); 274 break; 275 case MachineOperand::MO_GlobalAddress: 276 MCOp = GetSymbolRef(getSymbol(MO.getGlobal())); 277 break; 278 case MachineOperand::MO_FPImmediate: { 279 const ConstantFP *Cnt = MO.getFPImm(); 280 const APFloat &Val = Cnt->getValueAPF(); 281 282 switch (Cnt->getType()->getTypeID()) { 283 default: report_fatal_error("Unsupported FP type"); break; 284 case Type::FloatTyID: 285 MCOp = MCOperand::createExpr( 286 NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext)); 287 break; 288 case Type::DoubleTyID: 289 MCOp = MCOperand::createExpr( 290 NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext)); 291 break; 292 } 293 break; 294 } 295 } 296 return true; 297 } 298 299 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 300 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 301 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 302 303 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 304 unsigned RegNum = RegMap[Reg]; 305 306 // Encode the register class in the upper 4 bits 307 // Must be kept in sync with NVPTXInstPrinter::printRegName 308 unsigned Ret = 0; 309 if (RC == &NVPTX::Int1RegsRegClass) { 310 Ret = (1 << 28); 311 } else if (RC == &NVPTX::Int16RegsRegClass) { 312 Ret = (2 << 28); 313 } else if (RC == &NVPTX::Int32RegsRegClass) { 314 Ret = (3 << 28); 315 } else if (RC == &NVPTX::Int64RegsRegClass) { 316 Ret = (4 << 28); 317 } else if (RC == &NVPTX::Float32RegsRegClass) { 318 Ret = (5 << 28); 319 } else if (RC == &NVPTX::Float64RegsRegClass) { 320 Ret = (6 << 28); 321 } else { 322 report_fatal_error("Bad register class"); 323 } 324 325 // Insert the vreg number 326 Ret |= (RegNum & 0x0FFFFFFF); 327 return Ret; 328 } else { 329 // Some special-use registers are actually physical registers. 330 // Encode this as the register class ID of 0 and the real register ID. 331 return Reg & 0x0FFFFFFF; 332 } 333 } 334 335 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { 336 const MCExpr *Expr; 337 Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, 338 OutContext); 339 return MCOperand::createExpr(Expr); 340 } 341 342 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 343 const DataLayout &DL = getDataLayout(); 344 const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); 345 346 Type *Ty = F->getReturnType(); 347 348 bool isABI = (nvptxSubtarget->getSmVersion() >= 20); 349 350 if (Ty->getTypeID() == Type::VoidTyID) 351 return; 352 353 O << " ("; 354 355 if (isABI) { 356 if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) { 357 unsigned size = 0; 358 if (auto *ITy = dyn_cast<IntegerType>(Ty)) { 359 size = ITy->getBitWidth(); 360 if (size < 32) 361 size = 32; 362 } else { 363 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 364 size = Ty->getPrimitiveSizeInBits(); 365 } 366 367 O << ".param .b" << size << " func_retval0"; 368 } else if (isa<PointerType>(Ty)) { 369 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits() 370 << " func_retval0"; 371 } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { 372 unsigned totalsz = DL.getTypeAllocSize(Ty); 373 unsigned retAlignment = 0; 374 if (!llvm::getAlign(*F, 0, retAlignment)) 375 retAlignment = DL.getABITypeAlignment(Ty); 376 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 377 << "]"; 378 } else 379 llvm_unreachable("Unknown return type"); 380 } else { 381 SmallVector<EVT, 16> vtparts; 382 ComputeValueVTs(*TLI, DL, Ty, vtparts); 383 unsigned idx = 0; 384 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 385 unsigned elems = 1; 386 EVT elemtype = vtparts[i]; 387 if (vtparts[i].isVector()) { 388 elems = vtparts[i].getVectorNumElements(); 389 elemtype = vtparts[i].getVectorElementType(); 390 } 391 392 for (unsigned j = 0, je = elems; j != je; ++j) { 393 unsigned sz = elemtype.getSizeInBits(); 394 if (elemtype.isInteger() && (sz < 32)) 395 sz = 32; 396 O << ".reg .b" << sz << " func_retval" << idx; 397 if (j < je - 1) 398 O << ", "; 399 ++idx; 400 } 401 if (i < e - 1) 402 O << ", "; 403 } 404 } 405 O << ") "; 406 return; 407 } 408 409 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 410 raw_ostream &O) { 411 const Function *F = MF.getFunction(); 412 printReturnValStr(F, O); 413 } 414 415 // Return true if MBB is the header of a loop marked with 416 // llvm.loop.unroll.disable. 417 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll". 418 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( 419 const MachineBasicBlock &MBB) const { 420 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); 421 // We insert .pragma "nounroll" only to the loop header. 422 if (!LI.isLoopHeader(&MBB)) 423 return false; 424 425 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, 426 // we iterate through each back edge of the loop with header MBB, and check 427 // whether its metadata contains llvm.loop.unroll.disable. 428 for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) { 429 const MachineBasicBlock *PMBB = *I; 430 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) { 431 // Edges from other loops to MBB are not back edges. 432 continue; 433 } 434 if (const BasicBlock *PBB = PMBB->getBasicBlock()) { 435 if (MDNode *LoopID = 436 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) { 437 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable")) 438 return true; 439 } 440 } 441 } 442 return false; 443 } 444 445 void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { 446 AsmPrinter::EmitBasicBlockStart(MBB); 447 if (isLoopHeaderOfNoUnroll(MBB)) 448 OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n")); 449 } 450 451 void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 452 SmallString<128> Str; 453 raw_svector_ostream O(Str); 454 455 if (!GlobalsEmitted) { 456 emitGlobals(*MF->getFunction()->getParent()); 457 GlobalsEmitted = true; 458 } 459 460 // Set up 461 MRI = &MF->getRegInfo(); 462 F = MF->getFunction(); 463 emitLinkageDirective(F, O); 464 if (llvm::isKernelFunction(*F)) 465 O << ".entry "; 466 else { 467 O << ".func "; 468 printReturnValStr(*MF, O); 469 } 470 471 CurrentFnSym->print(O, MAI); 472 473 emitFunctionParamList(*MF, O); 474 475 if (llvm::isKernelFunction(*F)) 476 emitKernelFunctionDirectives(*F, O); 477 478 OutStreamer->EmitRawText(O.str()); 479 480 prevDebugLoc = DebugLoc(); 481 } 482 483 void NVPTXAsmPrinter::EmitFunctionBodyStart() { 484 VRegMapping.clear(); 485 OutStreamer->EmitRawText(StringRef("{\n")); 486 setAndEmitFunctionVirtualRegisters(*MF); 487 488 SmallString<128> Str; 489 raw_svector_ostream O(Str); 490 emitDemotedVars(MF->getFunction(), O); 491 OutStreamer->EmitRawText(O.str()); 492 } 493 494 void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 495 OutStreamer->EmitRawText(StringRef("}\n")); 496 VRegMapping.clear(); 497 } 498 499 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { 500 unsigned RegNo = MI->getOperand(0).getReg(); 501 if (TargetRegisterInfo::isVirtualRegister(RegNo)) { 502 OutStreamer->AddComment(Twine("implicit-def: ") + 503 getVirtualRegisterName(RegNo)); 504 } else { 505 OutStreamer->AddComment(Twine("implicit-def: ") + 506 nvptxSubtarget->getRegisterInfo()->getName(RegNo)); 507 } 508 OutStreamer->AddBlankLine(); 509 } 510 511 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 512 raw_ostream &O) const { 513 // If the NVVM IR has some of reqntid* specified, then output 514 // the reqntid directive, and set the unspecified ones to 1. 515 // If none of reqntid* is specified, don't output reqntid directive. 516 unsigned reqntidx, reqntidy, reqntidz; 517 bool specified = false; 518 if (!llvm::getReqNTIDx(F, reqntidx)) 519 reqntidx = 1; 520 else 521 specified = true; 522 if (!llvm::getReqNTIDy(F, reqntidy)) 523 reqntidy = 1; 524 else 525 specified = true; 526 if (!llvm::getReqNTIDz(F, reqntidz)) 527 reqntidz = 1; 528 else 529 specified = true; 530 531 if (specified) 532 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 533 << "\n"; 534 535 // If the NVVM IR has some of maxntid* specified, then output 536 // the maxntid directive, and set the unspecified ones to 1. 537 // If none of maxntid* is specified, don't output maxntid directive. 538 unsigned maxntidx, maxntidy, maxntidz; 539 specified = false; 540 if (!llvm::getMaxNTIDx(F, maxntidx)) 541 maxntidx = 1; 542 else 543 specified = true; 544 if (!llvm::getMaxNTIDy(F, maxntidy)) 545 maxntidy = 1; 546 else 547 specified = true; 548 if (!llvm::getMaxNTIDz(F, maxntidz)) 549 maxntidz = 1; 550 else 551 specified = true; 552 553 if (specified) 554 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 555 << "\n"; 556 557 unsigned mincta; 558 if (llvm::getMinCTASm(F, mincta)) 559 O << ".minnctapersm " << mincta << "\n"; 560 } 561 562 std::string 563 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { 564 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 565 566 std::string Name; 567 raw_string_ostream NameStr(Name); 568 569 VRegRCMap::const_iterator I = VRegMapping.find(RC); 570 assert(I != VRegMapping.end() && "Bad register class"); 571 const DenseMap<unsigned, unsigned> &RegMap = I->second; 572 573 VRegMap::const_iterator VI = RegMap.find(Reg); 574 assert(VI != RegMap.end() && "Bad virtual register"); 575 unsigned MappedVR = VI->second; 576 577 NameStr << getNVPTXRegClassStr(RC) << MappedVR; 578 579 NameStr.flush(); 580 return Name; 581 } 582 583 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, 584 raw_ostream &O) { 585 O << getVirtualRegisterName(vr); 586 } 587 588 void NVPTXAsmPrinter::printVecModifiedImmediate( 589 const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 590 static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 591 int Imm = (int) MO.getImm(); 592 if (0 == strcmp(Modifier, "vecelem")) 593 O << "_" << vecelem[Imm]; 594 else if (0 == strcmp(Modifier, "vecv4comm1")) { 595 if ((Imm < 0) || (Imm > 3)) 596 O << "//"; 597 } else if (0 == strcmp(Modifier, "vecv4comm2")) { 598 if ((Imm < 4) || (Imm > 7)) 599 O << "//"; 600 } else if (0 == strcmp(Modifier, "vecv4pos")) { 601 if (Imm < 0) 602 Imm = 0; 603 O << "_" << vecelem[Imm % 4]; 604 } else if (0 == strcmp(Modifier, "vecv2comm1")) { 605 if ((Imm < 0) || (Imm > 1)) 606 O << "//"; 607 } else if (0 == strcmp(Modifier, "vecv2comm2")) { 608 if ((Imm < 2) || (Imm > 3)) 609 O << "//"; 610 } else if (0 == strcmp(Modifier, "vecv2pos")) { 611 if (Imm < 0) 612 Imm = 0; 613 O << "_" << vecelem[Imm % 2]; 614 } else 615 llvm_unreachable("Unknown Modifier on immediate operand"); 616 } 617 618 619 620 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 621 622 emitLinkageDirective(F, O); 623 if (llvm::isKernelFunction(*F)) 624 O << ".entry "; 625 else 626 O << ".func "; 627 printReturnValStr(F, O); 628 getSymbol(F)->print(O, MAI); 629 O << "\n"; 630 emitFunctionParamList(F, O); 631 O << ";\n"; 632 } 633 634 static bool usedInGlobalVarDef(const Constant *C) { 635 if (!C) 636 return false; 637 638 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 639 return GV->getName() != "llvm.used"; 640 } 641 642 for (const User *U : C->users()) 643 if (const Constant *C = dyn_cast<Constant>(U)) 644 if (usedInGlobalVarDef(C)) 645 return true; 646 647 return false; 648 } 649 650 static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 651 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 652 if (othergv->getName() == "llvm.used") 653 return true; 654 } 655 656 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 657 if (instr->getParent() && instr->getParent()->getParent()) { 658 const Function *curFunc = instr->getParent()->getParent(); 659 if (oneFunc && (curFunc != oneFunc)) 660 return false; 661 oneFunc = curFunc; 662 return true; 663 } else 664 return false; 665 } 666 667 for (const User *UU : U->users()) 668 if (!usedInOneFunc(UU, oneFunc)) 669 return false; 670 671 return true; 672 } 673 674 /* Find out if a global variable can be demoted to local scope. 675 * Currently, this is valid for CUDA shared variables, which have local 676 * scope and global lifetime. So the conditions to check are : 677 * 1. Is the global variable in shared address space? 678 * 2. Does it have internal linkage? 679 * 3. Is the global variable referenced only in one function? 680 */ 681 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 682 if (!gv->hasInternalLinkage()) 683 return false; 684 PointerType *Pty = gv->getType(); 685 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 686 return false; 687 688 const Function *oneFunc = nullptr; 689 690 bool flag = usedInOneFunc(gv, oneFunc); 691 if (!flag) 692 return false; 693 if (!oneFunc) 694 return false; 695 f = oneFunc; 696 return true; 697 } 698 699 static bool useFuncSeen(const Constant *C, 700 llvm::DenseMap<const Function *, bool> &seenMap) { 701 for (const User *U : C->users()) { 702 if (const Constant *cu = dyn_cast<Constant>(U)) { 703 if (useFuncSeen(cu, seenMap)) 704 return true; 705 } else if (const Instruction *I = dyn_cast<Instruction>(U)) { 706 const BasicBlock *bb = I->getParent(); 707 if (!bb) 708 continue; 709 const Function *caller = bb->getParent(); 710 if (!caller) 711 continue; 712 if (seenMap.find(caller) != seenMap.end()) 713 return true; 714 } 715 } 716 return false; 717 } 718 719 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 720 llvm::DenseMap<const Function *, bool> seenMap; 721 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 722 const Function *F = &*FI; 723 724 if (F->isDeclaration()) { 725 if (F->use_empty()) 726 continue; 727 if (F->getIntrinsicID()) 728 continue; 729 emitDeclaration(F, O); 730 continue; 731 } 732 for (const User *U : F->users()) { 733 if (const Constant *C = dyn_cast<Constant>(U)) { 734 if (usedInGlobalVarDef(C)) { 735 // The use is in the initialization of a global variable 736 // that is a function pointer, so print a declaration 737 // for the original function 738 emitDeclaration(F, O); 739 break; 740 } 741 // Emit a declaration of this function if the function that 742 // uses this constant expr has already been seen. 743 if (useFuncSeen(C, seenMap)) { 744 emitDeclaration(F, O); 745 break; 746 } 747 } 748 749 if (!isa<Instruction>(U)) 750 continue; 751 const Instruction *instr = cast<Instruction>(U); 752 const BasicBlock *bb = instr->getParent(); 753 if (!bb) 754 continue; 755 const Function *caller = bb->getParent(); 756 if (!caller) 757 continue; 758 759 // If a caller has already been seen, then the caller is 760 // appearing in the module before the callee. so print out 761 // a declaration for the callee. 762 if (seenMap.find(caller) != seenMap.end()) { 763 emitDeclaration(F, O); 764 break; 765 } 766 } 767 seenMap[F] = true; 768 } 769 } 770 771 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 772 DebugInfoFinder DbgFinder; 773 DbgFinder.processModule(M); 774 775 unsigned i = 1; 776 for (const DICompileUnit *DIUnit : DbgFinder.compile_units()) { 777 StringRef Filename = DIUnit->getFilename(); 778 StringRef Dirname = DIUnit->getDirectory(); 779 SmallString<128> FullPathName = Dirname; 780 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 781 sys::path::append(FullPathName, Filename); 782 Filename = FullPathName; 783 } 784 if (filenameMap.find(Filename) != filenameMap.end()) 785 continue; 786 filenameMap[Filename] = i; 787 OutStreamer->EmitDwarfFileDirective(i, "", Filename); 788 ++i; 789 } 790 791 for (DISubprogram *SP : DbgFinder.subprograms()) { 792 StringRef Filename = SP->getFilename(); 793 StringRef Dirname = SP->getDirectory(); 794 SmallString<128> FullPathName = Dirname; 795 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 796 sys::path::append(FullPathName, Filename); 797 Filename = FullPathName; 798 } 799 if (filenameMap.find(Filename) != filenameMap.end()) 800 continue; 801 filenameMap[Filename] = i; 802 OutStreamer->EmitDwarfFileDirective(i, "", Filename); 803 ++i; 804 } 805 } 806 807 static bool isEmptyXXStructor(GlobalVariable *GV) { 808 if (!GV) return true; 809 const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); 810 if (!InitList) return true; // Not an array; we don't know how to parse. 811 return InitList->getNumOperands() == 0; 812 } 813 814 bool NVPTXAsmPrinter::doInitialization(Module &M) { 815 // Construct a default subtarget off of the TargetMachine defaults. The 816 // rest of NVPTX isn't friendly to change subtargets per function and 817 // so the default TargetMachine will have all of the options. 818 const Triple &TT = TM.getTargetTriple(); 819 StringRef CPU = TM.getTargetCPU(); 820 StringRef FS = TM.getTargetFeatureString(); 821 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 822 const NVPTXSubtarget STI(TT, CPU, FS, NTM); 823 824 if (M.alias_size()) { 825 report_fatal_error("Module has aliases, which NVPTX does not support."); 826 return true; // error 827 } 828 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) { 829 report_fatal_error( 830 "Module has a nontrivial global ctor, which NVPTX does not support."); 831 return true; // error 832 } 833 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) { 834 report_fatal_error( 835 "Module has a nontrivial global dtor, which NVPTX does not support."); 836 return true; // error 837 } 838 839 SmallString<128> Str1; 840 raw_svector_ostream OS1(Str1); 841 842 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 843 844 // We need to call the parent's one explicitly. 845 //bool Result = AsmPrinter::doInitialization(M); 846 847 // Initialize TargetLoweringObjectFile. 848 const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 849 .Initialize(OutContext, TM); 850 851 Mang = new Mangler(); 852 853 // Emit header before any dwarf directives are emitted below. 854 emitHeader(M, OS1, STI); 855 OutStreamer->EmitRawText(OS1.str()); 856 857 // Already commented out 858 //bool Result = AsmPrinter::doInitialization(M); 859 860 // Emit module-level inline asm if it exists. 861 if (!M.getModuleInlineAsm().empty()) { 862 OutStreamer->AddComment("Start of file scope inline assembly"); 863 OutStreamer->AddBlankLine(); 864 OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm())); 865 OutStreamer->AddBlankLine(); 866 OutStreamer->AddComment("End of file scope inline assembly"); 867 OutStreamer->AddBlankLine(); 868 } 869 870 // If we're not NVCL we're CUDA, go ahead and emit filenames. 871 if (TM.getTargetTriple().getOS() != Triple::NVCL) 872 recordAndEmitFilenames(M); 873 874 GlobalsEmitted = false; 875 876 return false; // success 877 } 878 879 void NVPTXAsmPrinter::emitGlobals(const Module &M) { 880 SmallString<128> Str2; 881 raw_svector_ostream OS2(Str2); 882 883 emitDeclarations(M, OS2); 884 885 // As ptxas does not support forward references of globals, we need to first 886 // sort the list of module-level globals in def-use order. We visit each 887 // global variable in order, and ensure that we emit it *after* its dependent 888 // globals. We use a little extra memory maintaining both a set and a list to 889 // have fast searches while maintaining a strict ordering. 890 SmallVector<const GlobalVariable *, 8> Globals; 891 DenseSet<const GlobalVariable *> GVVisited; 892 DenseSet<const GlobalVariable *> GVVisiting; 893 894 // Visit each global variable, in order 895 for (const GlobalVariable &I : M.globals()) 896 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting); 897 898 assert(GVVisited.size() == M.getGlobalList().size() && 899 "Missed a global variable"); 900 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 901 902 // Print out module-level global variables in proper order 903 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 904 printModuleLevelGV(Globals[i], OS2); 905 906 OS2 << '\n'; 907 908 OutStreamer->EmitRawText(OS2.str()); 909 } 910 911 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O, 912 const NVPTXSubtarget &STI) { 913 O << "//\n"; 914 O << "// Generated by LLVM NVPTX Back-End\n"; 915 O << "//\n"; 916 O << "\n"; 917 918 unsigned PTXVersion = STI.getPTXVersion(); 919 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 920 921 O << ".target "; 922 O << STI.getTargetName(); 923 924 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 925 if (NTM.getDrvInterface() == NVPTX::NVCL) 926 O << ", texmode_independent"; 927 else { 928 if (!STI.hasDouble()) 929 O << ", map_f64_to_f32"; 930 } 931 932 if (MAI->doesSupportDebugInformation()) 933 O << ", debug"; 934 935 O << "\n"; 936 937 O << ".address_size "; 938 if (NTM.is64Bit()) 939 O << "64"; 940 else 941 O << "32"; 942 O << "\n"; 943 944 O << "\n"; 945 } 946 947 bool NVPTXAsmPrinter::doFinalization(Module &M) { 948 // If we did not emit any functions, then the global declarations have not 949 // yet been emitted. 950 if (!GlobalsEmitted) { 951 emitGlobals(M); 952 GlobalsEmitted = true; 953 } 954 955 // XXX Temproarily remove global variables so that doFinalization() will not 956 // emit them again (global variables are emitted at beginning). 957 958 Module::GlobalListType &global_list = M.getGlobalList(); 959 int i, n = global_list.size(); 960 GlobalVariable **gv_array = new GlobalVariable *[n]; 961 962 // first, back-up GlobalVariable in gv_array 963 i = 0; 964 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 965 I != E; ++I) 966 gv_array[i++] = &*I; 967 968 // second, empty global_list 969 while (!global_list.empty()) 970 global_list.remove(global_list.begin()); 971 972 // call doFinalization 973 bool ret = AsmPrinter::doFinalization(M); 974 975 // now we restore global variables 976 for (i = 0; i < n; i++) 977 global_list.insert(global_list.end(), gv_array[i]); 978 979 clearAnnotationCache(&M); 980 981 delete[] gv_array; 982 return ret; 983 984 //bool Result = AsmPrinter::doFinalization(M); 985 // Instead of calling the parents doFinalization, we may 986 // clone parents doFinalization and customize here. 987 // Currently, we if NVISA out the EmitGlobals() in 988 // parent's doFinalization, which is too intrusive. 989 // 990 // Same for the doInitialization. 991 //return Result; 992 } 993 994 // This function emits appropriate linkage directives for 995 // functions and global variables. 996 // 997 // extern function declaration -> .extern 998 // extern function definition -> .visible 999 // external global variable with init -> .visible 1000 // external without init -> .extern 1001 // appending -> not allowed, assert. 1002 // for any linkage other than 1003 // internal, private, linker_private, 1004 // linker_private_weak, linker_private_weak_def_auto, 1005 // we emit -> .weak. 1006 1007 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 1008 raw_ostream &O) { 1009 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) { 1010 if (V->hasExternalLinkage()) { 1011 if (isa<GlobalVariable>(V)) { 1012 const GlobalVariable *GVar = cast<GlobalVariable>(V); 1013 if (GVar) { 1014 if (GVar->hasInitializer()) 1015 O << ".visible "; 1016 else 1017 O << ".extern "; 1018 } 1019 } else if (V->isDeclaration()) 1020 O << ".extern "; 1021 else 1022 O << ".visible "; 1023 } else if (V->hasAppendingLinkage()) { 1024 std::string msg; 1025 msg.append("Error: "); 1026 msg.append("Symbol "); 1027 if (V->hasName()) 1028 msg.append(V->getName()); 1029 msg.append("has unsupported appending linkage type"); 1030 llvm_unreachable(msg.c_str()); 1031 } else if (!V->hasInternalLinkage() && 1032 !V->hasPrivateLinkage()) { 1033 O << ".weak "; 1034 } 1035 } 1036 } 1037 1038 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 1039 raw_ostream &O, 1040 bool processDemoted) { 1041 1042 // Skip meta data 1043 if (GVar->hasSection()) { 1044 if (GVar->getSection() == "llvm.metadata") 1045 return; 1046 } 1047 1048 // Skip LLVM intrinsic global variables 1049 if (GVar->getName().startswith("llvm.") || 1050 GVar->getName().startswith("nvvm.")) 1051 return; 1052 1053 const DataLayout &DL = getDataLayout(); 1054 1055 // GlobalVariables are always constant pointers themselves. 1056 PointerType *PTy = GVar->getType(); 1057 Type *ETy = GVar->getValueType(); 1058 1059 if (GVar->hasExternalLinkage()) { 1060 if (GVar->hasInitializer()) 1061 O << ".visible "; 1062 else 1063 O << ".extern "; 1064 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() || 1065 GVar->hasAvailableExternallyLinkage() || 1066 GVar->hasCommonLinkage()) { 1067 O << ".weak "; 1068 } 1069 1070 if (llvm::isTexture(*GVar)) { 1071 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 1072 return; 1073 } 1074 1075 if (llvm::isSurface(*GVar)) { 1076 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 1077 return; 1078 } 1079 1080 if (GVar->isDeclaration()) { 1081 // (extern) declarations, no definition or initializer 1082 // Currently the only known declaration is for an automatic __local 1083 // (.shared) promoted to global. 1084 emitPTXGlobalVariable(GVar, O); 1085 O << ";\n"; 1086 return; 1087 } 1088 1089 if (llvm::isSampler(*GVar)) { 1090 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 1091 1092 const Constant *Initializer = nullptr; 1093 if (GVar->hasInitializer()) 1094 Initializer = GVar->getInitializer(); 1095 const ConstantInt *CI = nullptr; 1096 if (Initializer) 1097 CI = dyn_cast<ConstantInt>(Initializer); 1098 if (CI) { 1099 unsigned sample = CI->getZExtValue(); 1100 1101 O << " = { "; 1102 1103 for (int i = 0, 1104 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1105 i < 3; i++) { 1106 O << "addr_mode_" << i << " = "; 1107 switch (addr) { 1108 case 0: 1109 O << "wrap"; 1110 break; 1111 case 1: 1112 O << "clamp_to_border"; 1113 break; 1114 case 2: 1115 O << "clamp_to_edge"; 1116 break; 1117 case 3: 1118 O << "wrap"; 1119 break; 1120 case 4: 1121 O << "mirror"; 1122 break; 1123 } 1124 O << ", "; 1125 } 1126 O << "filter_mode = "; 1127 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1128 case 0: 1129 O << "nearest"; 1130 break; 1131 case 1: 1132 O << "linear"; 1133 break; 1134 case 2: 1135 llvm_unreachable("Anisotropic filtering is not supported"); 1136 default: 1137 O << "nearest"; 1138 break; 1139 } 1140 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1141 O << ", force_unnormalized_coords = 1"; 1142 } 1143 O << " }"; 1144 } 1145 1146 O << ";\n"; 1147 return; 1148 } 1149 1150 if (GVar->hasPrivateLinkage()) { 1151 1152 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 1153 return; 1154 1155 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1156 if (!strncmp(GVar->getName().data(), "filename", 8)) 1157 return; 1158 if (GVar->use_empty()) 1159 return; 1160 } 1161 1162 const Function *demotedFunc = nullptr; 1163 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1164 O << "// " << GVar->getName() << " has been demoted\n"; 1165 if (localDecls.find(demotedFunc) != localDecls.end()) 1166 localDecls[demotedFunc].push_back(GVar); 1167 else { 1168 std::vector<const GlobalVariable *> temp; 1169 temp.push_back(GVar); 1170 localDecls[demotedFunc] = temp; 1171 } 1172 return; 1173 } 1174 1175 O << "."; 1176 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1177 1178 if (isManaged(*GVar)) { 1179 O << " .attribute(.managed)"; 1180 } 1181 1182 if (GVar->getAlignment() == 0) 1183 O << " .align " << (int)DL.getPrefTypeAlignment(ETy); 1184 else 1185 O << " .align " << GVar->getAlignment(); 1186 1187 if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { 1188 O << " ."; 1189 // Special case: ABI requires that we use .u8 for predicates 1190 if (ETy->isIntegerTy(1)) 1191 O << "u8"; 1192 else 1193 O << getPTXFundamentalTypeStr(ETy, false); 1194 O << " "; 1195 getSymbol(GVar)->print(O, MAI); 1196 1197 // Ptx allows variable initilization only for constant and global state 1198 // spaces. 1199 if (GVar->hasInitializer()) { 1200 if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1201 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) { 1202 const Constant *Initializer = GVar->getInitializer(); 1203 // 'undef' is treated as there is no value specified. 1204 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) { 1205 O << " = "; 1206 printScalarConstant(Initializer, O); 1207 } 1208 } else { 1209 // The frontend adds zero-initializer to device and constant variables 1210 // that don't have an initial value, and UndefValue to shared 1211 // variables, so skip warning for this case. 1212 if (!GVar->getInitializer()->isNullValue() && 1213 !isa<UndefValue>(GVar->getInitializer())) { 1214 report_fatal_error("initial value of '" + GVar->getName() + 1215 "' is not allowed in addrspace(" + 1216 Twine(PTy->getAddressSpace()) + ")"); 1217 } 1218 } 1219 } 1220 } else { 1221 unsigned int ElementSize = 0; 1222 1223 // Although PTX has direct support for struct type and array type and 1224 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1225 // targets that support these high level field accesses. Structs, arrays 1226 // and vectors are lowered into arrays of bytes. 1227 switch (ETy->getTypeID()) { 1228 case Type::StructTyID: 1229 case Type::ArrayTyID: 1230 case Type::VectorTyID: 1231 ElementSize = DL.getTypeStoreSize(ETy); 1232 // Ptx allows variable initilization only for constant and 1233 // global state spaces. 1234 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1235 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1236 GVar->hasInitializer()) { 1237 const Constant *Initializer = GVar->getInitializer(); 1238 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1239 AggBuffer aggBuffer(ElementSize, O, *this); 1240 bufferAggregateConstant(Initializer, &aggBuffer); 1241 if (aggBuffer.numSymbols) { 1242 if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { 1243 O << " .u64 "; 1244 getSymbol(GVar)->print(O, MAI); 1245 O << "["; 1246 O << ElementSize / 8; 1247 } else { 1248 O << " .u32 "; 1249 getSymbol(GVar)->print(O, MAI); 1250 O << "["; 1251 O << ElementSize / 4; 1252 } 1253 O << "]"; 1254 } else { 1255 O << " .b8 "; 1256 getSymbol(GVar)->print(O, MAI); 1257 O << "["; 1258 O << ElementSize; 1259 O << "]"; 1260 } 1261 O << " = {"; 1262 aggBuffer.print(); 1263 O << "}"; 1264 } else { 1265 O << " .b8 "; 1266 getSymbol(GVar)->print(O, MAI); 1267 if (ElementSize) { 1268 O << "["; 1269 O << ElementSize; 1270 O << "]"; 1271 } 1272 } 1273 } else { 1274 O << " .b8 "; 1275 getSymbol(GVar)->print(O, MAI); 1276 if (ElementSize) { 1277 O << "["; 1278 O << ElementSize; 1279 O << "]"; 1280 } 1281 } 1282 break; 1283 default: 1284 llvm_unreachable("type not supported yet"); 1285 } 1286 1287 } 1288 O << ";\n"; 1289 } 1290 1291 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1292 if (localDecls.find(f) == localDecls.end()) 1293 return; 1294 1295 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 1296 1297 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1298 O << "\t// demoted variable\n\t"; 1299 printModuleLevelGV(gvars[i], O, true); 1300 } 1301 } 1302 1303 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1304 raw_ostream &O) const { 1305 switch (AddressSpace) { 1306 case llvm::ADDRESS_SPACE_LOCAL: 1307 O << "local"; 1308 break; 1309 case llvm::ADDRESS_SPACE_GLOBAL: 1310 O << "global"; 1311 break; 1312 case llvm::ADDRESS_SPACE_CONST: 1313 O << "const"; 1314 break; 1315 case llvm::ADDRESS_SPACE_SHARED: 1316 O << "shared"; 1317 break; 1318 default: 1319 report_fatal_error("Bad address space found while emitting PTX"); 1320 break; 1321 } 1322 } 1323 1324 std::string 1325 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const { 1326 switch (Ty->getTypeID()) { 1327 default: 1328 llvm_unreachable("unexpected type"); 1329 break; 1330 case Type::IntegerTyID: { 1331 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1332 if (NumBits == 1) 1333 return "pred"; 1334 else if (NumBits <= 64) { 1335 std::string name = "u"; 1336 return name + utostr(NumBits); 1337 } else { 1338 llvm_unreachable("Integer too large"); 1339 break; 1340 } 1341 break; 1342 } 1343 case Type::FloatTyID: 1344 return "f32"; 1345 case Type::DoubleTyID: 1346 return "f64"; 1347 case Type::PointerTyID: 1348 if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) 1349 if (useB4PTR) 1350 return "b64"; 1351 else 1352 return "u64"; 1353 else if (useB4PTR) 1354 return "b32"; 1355 else 1356 return "u32"; 1357 } 1358 llvm_unreachable("unexpected type"); 1359 return nullptr; 1360 } 1361 1362 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1363 raw_ostream &O) { 1364 1365 const DataLayout &DL = getDataLayout(); 1366 1367 // GlobalVariables are always constant pointers themselves. 1368 Type *ETy = GVar->getValueType(); 1369 1370 O << "."; 1371 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O); 1372 if (GVar->getAlignment() == 0) 1373 O << " .align " << (int)DL.getPrefTypeAlignment(ETy); 1374 else 1375 O << " .align " << GVar->getAlignment(); 1376 1377 if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { 1378 O << " ."; 1379 O << getPTXFundamentalTypeStr(ETy); 1380 O << " "; 1381 getSymbol(GVar)->print(O, MAI); 1382 return; 1383 } 1384 1385 int64_t ElementSize = 0; 1386 1387 // Although PTX has direct support for struct type and array type and LLVM IR 1388 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1389 // support these high level field accesses. Structs and arrays are lowered 1390 // into arrays of bytes. 1391 switch (ETy->getTypeID()) { 1392 case Type::StructTyID: 1393 case Type::ArrayTyID: 1394 case Type::VectorTyID: 1395 ElementSize = DL.getTypeStoreSize(ETy); 1396 O << " .b8 "; 1397 getSymbol(GVar)->print(O, MAI); 1398 O << "["; 1399 if (ElementSize) { 1400 O << ElementSize; 1401 } 1402 O << "]"; 1403 break; 1404 default: 1405 llvm_unreachable("type not supported yet"); 1406 } 1407 return; 1408 } 1409 1410 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) { 1411 if (Ty->isSingleValueType()) 1412 return DL.getPrefTypeAlignment(Ty); 1413 1414 auto *ATy = dyn_cast<ArrayType>(Ty); 1415 if (ATy) 1416 return getOpenCLAlignment(DL, ATy->getElementType()); 1417 1418 auto *STy = dyn_cast<StructType>(Ty); 1419 if (STy) { 1420 unsigned int alignStruct = 1; 1421 // Go through each element of the struct and find the 1422 // largest alignment. 1423 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1424 Type *ETy = STy->getElementType(i); 1425 unsigned int align = getOpenCLAlignment(DL, ETy); 1426 if (align > alignStruct) 1427 alignStruct = align; 1428 } 1429 return alignStruct; 1430 } 1431 1432 auto *FTy = dyn_cast<FunctionType>(Ty); 1433 if (FTy) 1434 return DL.getPointerPrefAlignment(); 1435 return DL.getPrefTypeAlignment(Ty); 1436 } 1437 1438 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1439 int paramIndex, raw_ostream &O) { 1440 getSymbol(I->getParent())->print(O, MAI); 1441 O << "_param_" << paramIndex; 1442 } 1443 1444 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1445 const DataLayout &DL = getDataLayout(); 1446 const AttributeSet &PAL = F->getAttributes(); 1447 const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); 1448 Function::const_arg_iterator I, E; 1449 unsigned paramIndex = 0; 1450 bool first = true; 1451 bool isKernelFunc = llvm::isKernelFunction(*F); 1452 bool isABI = (nvptxSubtarget->getSmVersion() >= 20); 1453 MVT thePointerTy = TLI->getPointerTy(DL); 1454 1455 if (F->arg_empty()) { 1456 O << "()\n"; 1457 return; 1458 } 1459 1460 O << "(\n"; 1461 1462 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1463 Type *Ty = I->getType(); 1464 1465 if (!first) 1466 O << ",\n"; 1467 1468 first = false; 1469 1470 // Handle image/sampler parameters 1471 if (isKernelFunction(*F)) { 1472 if (isSampler(*I) || isImage(*I)) { 1473 if (isImage(*I)) { 1474 std::string sname = I->getName(); 1475 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { 1476 if (nvptxSubtarget->hasImageHandles()) 1477 O << "\t.param .u64 .ptr .surfref "; 1478 else 1479 O << "\t.param .surfref "; 1480 CurrentFnSym->print(O, MAI); 1481 O << "_param_" << paramIndex; 1482 } 1483 else { // Default image is read_only 1484 if (nvptxSubtarget->hasImageHandles()) 1485 O << "\t.param .u64 .ptr .texref "; 1486 else 1487 O << "\t.param .texref "; 1488 CurrentFnSym->print(O, MAI); 1489 O << "_param_" << paramIndex; 1490 } 1491 } else { 1492 if (nvptxSubtarget->hasImageHandles()) 1493 O << "\t.param .u64 .ptr .samplerref "; 1494 else 1495 O << "\t.param .samplerref "; 1496 CurrentFnSym->print(O, MAI); 1497 O << "_param_" << paramIndex; 1498 } 1499 continue; 1500 } 1501 } 1502 1503 if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) { 1504 if (Ty->isAggregateType() || Ty->isVectorTy()) { 1505 // Just print .param .align <a> .b8 .param[size]; 1506 // <a> = PAL.getparamalignment 1507 // size = typeallocsize of element type 1508 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1509 if (align == 0) 1510 align = DL.getABITypeAlignment(Ty); 1511 1512 unsigned sz = DL.getTypeAllocSize(Ty); 1513 O << "\t.param .align " << align << " .b8 "; 1514 printParamName(I, paramIndex, O); 1515 O << "[" << sz << "]"; 1516 1517 continue; 1518 } 1519 // Just a scalar 1520 auto *PTy = dyn_cast<PointerType>(Ty); 1521 if (isKernelFunc) { 1522 if (PTy) { 1523 // Special handling for pointer arguments to kernel 1524 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1525 1526 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() != 1527 NVPTX::CUDA) { 1528 Type *ETy = PTy->getElementType(); 1529 int addrSpace = PTy->getAddressSpace(); 1530 switch (addrSpace) { 1531 default: 1532 O << ".ptr "; 1533 break; 1534 case llvm::ADDRESS_SPACE_CONST: 1535 O << ".ptr .const "; 1536 break; 1537 case llvm::ADDRESS_SPACE_SHARED: 1538 O << ".ptr .shared "; 1539 break; 1540 case llvm::ADDRESS_SPACE_GLOBAL: 1541 O << ".ptr .global "; 1542 break; 1543 } 1544 O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " "; 1545 } 1546 printParamName(I, paramIndex, O); 1547 continue; 1548 } 1549 1550 // non-pointer scalar to kernel func 1551 O << "\t.param ."; 1552 // Special case: predicate operands become .u8 types 1553 if (Ty->isIntegerTy(1)) 1554 O << "u8"; 1555 else 1556 O << getPTXFundamentalTypeStr(Ty); 1557 O << " "; 1558 printParamName(I, paramIndex, O); 1559 continue; 1560 } 1561 // Non-kernel function, just print .param .b<size> for ABI 1562 // and .reg .b<size> for non-ABI 1563 unsigned sz = 0; 1564 if (isa<IntegerType>(Ty)) { 1565 sz = cast<IntegerType>(Ty)->getBitWidth(); 1566 if (sz < 32) 1567 sz = 32; 1568 } else if (isa<PointerType>(Ty)) 1569 sz = thePointerTy.getSizeInBits(); 1570 else 1571 sz = Ty->getPrimitiveSizeInBits(); 1572 if (isABI) 1573 O << "\t.param .b" << sz << " "; 1574 else 1575 O << "\t.reg .b" << sz << " "; 1576 printParamName(I, paramIndex, O); 1577 continue; 1578 } 1579 1580 // param has byVal attribute. So should be a pointer 1581 auto *PTy = dyn_cast<PointerType>(Ty); 1582 assert(PTy && "Param with byval attribute should be a pointer type"); 1583 Type *ETy = PTy->getElementType(); 1584 1585 if (isABI || isKernelFunc) { 1586 // Just print .param .align <a> .b8 .param[size]; 1587 // <a> = PAL.getparamalignment 1588 // size = typeallocsize of element type 1589 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1590 if (align == 0) 1591 align = DL.getABITypeAlignment(ETy); 1592 1593 unsigned sz = DL.getTypeAllocSize(ETy); 1594 O << "\t.param .align " << align << " .b8 "; 1595 printParamName(I, paramIndex, O); 1596 O << "[" << sz << "]"; 1597 continue; 1598 } else { 1599 // Split the ETy into constituent parts and 1600 // print .param .b<size> <name> for each part. 1601 // Further, if a part is vector, print the above for 1602 // each vector element. 1603 SmallVector<EVT, 16> vtparts; 1604 ComputeValueVTs(*TLI, DL, ETy, vtparts); 1605 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1606 unsigned elems = 1; 1607 EVT elemtype = vtparts[i]; 1608 if (vtparts[i].isVector()) { 1609 elems = vtparts[i].getVectorNumElements(); 1610 elemtype = vtparts[i].getVectorElementType(); 1611 } 1612 1613 for (unsigned j = 0, je = elems; j != je; ++j) { 1614 unsigned sz = elemtype.getSizeInBits(); 1615 if (elemtype.isInteger() && (sz < 32)) 1616 sz = 32; 1617 O << "\t.reg .b" << sz << " "; 1618 printParamName(I, paramIndex, O); 1619 if (j < je - 1) 1620 O << ",\n"; 1621 ++paramIndex; 1622 } 1623 if (i < e - 1) 1624 O << ",\n"; 1625 } 1626 --paramIndex; 1627 continue; 1628 } 1629 } 1630 1631 O << "\n)\n"; 1632 } 1633 1634 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1635 raw_ostream &O) { 1636 const Function *F = MF.getFunction(); 1637 emitFunctionParamList(F, O); 1638 } 1639 1640 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1641 const MachineFunction &MF) { 1642 SmallString<128> Str; 1643 raw_svector_ostream O(Str); 1644 1645 // Map the global virtual register number to a register class specific 1646 // virtual register number starting from 1 with that class. 1647 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1648 //unsigned numRegClasses = TRI->getNumRegClasses(); 1649 1650 // Emit the Fake Stack Object 1651 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1652 int NumBytes = (int) MFI->getStackSize(); 1653 if (NumBytes) { 1654 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME 1655 << getFunctionNumber() << "[" << NumBytes << "];\n"; 1656 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { 1657 O << "\t.reg .b64 \t%SP;\n"; 1658 O << "\t.reg .b64 \t%SPL;\n"; 1659 } else { 1660 O << "\t.reg .b32 \t%SP;\n"; 1661 O << "\t.reg .b32 \t%SPL;\n"; 1662 } 1663 } 1664 1665 // Go through all virtual registers to establish the mapping between the 1666 // global virtual 1667 // register number and the per class virtual register number. 1668 // We use the per class virtual register number in the ptx output. 1669 unsigned int numVRs = MRI->getNumVirtRegs(); 1670 for (unsigned i = 0; i < numVRs; i++) { 1671 unsigned int vr = TRI->index2VirtReg(i); 1672 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1673 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1674 int n = regmap.size(); 1675 regmap.insert(std::make_pair(vr, n + 1)); 1676 } 1677 1678 // Emit register declarations 1679 // @TODO: Extract out the real register usage 1680 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1681 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1682 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1683 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1684 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n"; 1685 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1686 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n"; 1687 1688 // Emit declaration of the virtual registers or 'physical' registers for 1689 // each register class 1690 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 1691 const TargetRegisterClass *RC = TRI->getRegClass(i); 1692 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1693 std::string rcname = getNVPTXRegClassName(RC); 1694 std::string rcStr = getNVPTXRegClassStr(RC); 1695 int n = regmap.size(); 1696 1697 // Only declare those registers that may be used. 1698 if (n) { 1699 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1700 << ">;\n"; 1701 } 1702 } 1703 1704 OutStreamer->EmitRawText(O.str()); 1705 } 1706 1707 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1708 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1709 bool ignored; 1710 unsigned int numHex; 1711 const char *lead; 1712 1713 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 1714 numHex = 8; 1715 lead = "0f"; 1716 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); 1717 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1718 numHex = 16; 1719 lead = "0d"; 1720 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); 1721 } else 1722 llvm_unreachable("unsupported fp type"); 1723 1724 APInt API = APF.bitcastToAPInt(); 1725 std::string hexstr(utohexstr(API.getZExtValue())); 1726 O << lead; 1727 if (hexstr.length() < numHex) 1728 O << std::string(numHex - hexstr.length(), '0'); 1729 O << utohexstr(API.getZExtValue()); 1730 } 1731 1732 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 1733 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1734 O << CI->getValue(); 1735 return; 1736 } 1737 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1738 printFPConstant(CFP, O); 1739 return; 1740 } 1741 if (isa<ConstantPointerNull>(CPV)) { 1742 O << "0"; 1743 return; 1744 } 1745 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1746 bool IsNonGenericPointer = false; 1747 if (GVar->getType()->getAddressSpace() != 0) { 1748 IsNonGenericPointer = true; 1749 } 1750 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { 1751 O << "generic("; 1752 getSymbol(GVar)->print(O, MAI); 1753 O << ")"; 1754 } else { 1755 getSymbol(GVar)->print(O, MAI); 1756 } 1757 return; 1758 } 1759 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1760 const Value *v = Cexpr->stripPointerCasts(); 1761 PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType()); 1762 bool IsNonGenericPointer = false; 1763 if (PTy && PTy->getAddressSpace() != 0) { 1764 IsNonGenericPointer = true; 1765 } 1766 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1767 if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { 1768 O << "generic("; 1769 getSymbol(GVar)->print(O, MAI); 1770 O << ")"; 1771 } else { 1772 getSymbol(GVar)->print(O, MAI); 1773 } 1774 return; 1775 } else { 1776 lowerConstant(CPV)->print(O, MAI); 1777 return; 1778 } 1779 } 1780 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1781 } 1782 1783 // These utility functions assure we get the right sequence of bytes for a given 1784 // type even for big-endian machines 1785 template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) { 1786 int64_t vp = (int64_t)val; 1787 for (unsigned i = 0; i < sizeof(T); ++i) { 1788 p[i] = (unsigned char)vp; 1789 vp >>= 8; 1790 } 1791 } 1792 static void ConvertFloatToBytes(unsigned char *p, float val) { 1793 int32_t *vp = (int32_t *)&val; 1794 for (unsigned i = 0; i < sizeof(int32_t); ++i) { 1795 p[i] = (unsigned char)*vp; 1796 *vp >>= 8; 1797 } 1798 } 1799 static void ConvertDoubleToBytes(unsigned char *p, double val) { 1800 int64_t *vp = (int64_t *)&val; 1801 for (unsigned i = 0; i < sizeof(int64_t); ++i) { 1802 p[i] = (unsigned char)*vp; 1803 *vp >>= 8; 1804 } 1805 } 1806 1807 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 1808 AggBuffer *aggBuffer) { 1809 1810 const DataLayout &DL = getDataLayout(); 1811 1812 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1813 int s = DL.getTypeAllocSize(CPV->getType()); 1814 if (s < Bytes) 1815 s = Bytes; 1816 aggBuffer->addZeros(s); 1817 return; 1818 } 1819 1820 unsigned char ptr[8]; 1821 switch (CPV->getType()->getTypeID()) { 1822 1823 case Type::IntegerTyID: { 1824 Type *ETy = CPV->getType(); 1825 if (ETy == Type::getInt8Ty(CPV->getContext())) { 1826 unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); 1827 ConvertIntToBytes<>(ptr, c); 1828 aggBuffer->addBytes(ptr, 1, Bytes); 1829 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 1830 short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue(); 1831 ConvertIntToBytes<>(ptr, int16); 1832 aggBuffer->addBytes(ptr, 2, Bytes); 1833 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 1834 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1835 int int32 = (int)(constInt->getZExtValue()); 1836 ConvertIntToBytes<>(ptr, int32); 1837 aggBuffer->addBytes(ptr, 4, Bytes); 1838 break; 1839 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1840 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1841 ConstantFoldConstantExpression(Cexpr, DL))) { 1842 int int32 = (int)(constInt->getZExtValue()); 1843 ConvertIntToBytes<>(ptr, int32); 1844 aggBuffer->addBytes(ptr, 4, Bytes); 1845 break; 1846 } 1847 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1848 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1849 aggBuffer->addSymbol(v, Cexpr->getOperand(0)); 1850 aggBuffer->addZeros(4); 1851 break; 1852 } 1853 } 1854 llvm_unreachable("unsupported integer const type"); 1855 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 1856 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1857 long long int64 = (long long)(constInt->getZExtValue()); 1858 ConvertIntToBytes<>(ptr, int64); 1859 aggBuffer->addBytes(ptr, 8, Bytes); 1860 break; 1861 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1862 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1863 ConstantFoldConstantExpression(Cexpr, DL))) { 1864 long long int64 = (long long)(constInt->getZExtValue()); 1865 ConvertIntToBytes<>(ptr, int64); 1866 aggBuffer->addBytes(ptr, 8, Bytes); 1867 break; 1868 } 1869 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1870 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1871 aggBuffer->addSymbol(v, Cexpr->getOperand(0)); 1872 aggBuffer->addZeros(8); 1873 break; 1874 } 1875 } 1876 llvm_unreachable("unsupported integer const type"); 1877 } else 1878 llvm_unreachable("unsupported integer const type"); 1879 break; 1880 } 1881 case Type::FloatTyID: 1882 case Type::DoubleTyID: { 1883 const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 1884 Type *Ty = CFP->getType(); 1885 if (Ty == Type::getFloatTy(CPV->getContext())) { 1886 float float32 = (float) CFP->getValueAPF().convertToFloat(); 1887 ConvertFloatToBytes(ptr, float32); 1888 aggBuffer->addBytes(ptr, 4, Bytes); 1889 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1890 double float64 = CFP->getValueAPF().convertToDouble(); 1891 ConvertDoubleToBytes(ptr, float64); 1892 aggBuffer->addBytes(ptr, 8, Bytes); 1893 } else { 1894 llvm_unreachable("unsupported fp const type"); 1895 } 1896 break; 1897 } 1898 case Type::PointerTyID: { 1899 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1900 aggBuffer->addSymbol(GVar, GVar); 1901 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1902 const Value *v = Cexpr->stripPointerCasts(); 1903 aggBuffer->addSymbol(v, Cexpr); 1904 } 1905 unsigned int s = DL.getTypeAllocSize(CPV->getType()); 1906 aggBuffer->addZeros(s); 1907 break; 1908 } 1909 1910 case Type::ArrayTyID: 1911 case Type::VectorTyID: 1912 case Type::StructTyID: { 1913 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) { 1914 int ElementSize = DL.getTypeAllocSize(CPV->getType()); 1915 bufferAggregateConstant(CPV, aggBuffer); 1916 if (Bytes > ElementSize) 1917 aggBuffer->addZeros(Bytes - ElementSize); 1918 } else if (isa<ConstantAggregateZero>(CPV)) 1919 aggBuffer->addZeros(Bytes); 1920 else 1921 llvm_unreachable("Unexpected Constant type"); 1922 break; 1923 } 1924 1925 default: 1926 llvm_unreachable("unsupported type"); 1927 } 1928 } 1929 1930 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 1931 AggBuffer *aggBuffer) { 1932 const DataLayout &DL = getDataLayout(); 1933 int Bytes; 1934 1935 // Old constants 1936 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1937 if (CPV->getNumOperands()) 1938 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1939 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1940 return; 1941 } 1942 1943 if (const ConstantDataSequential *CDS = 1944 dyn_cast<ConstantDataSequential>(CPV)) { 1945 if (CDS->getNumElements()) 1946 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1947 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1948 aggBuffer); 1949 return; 1950 } 1951 1952 if (isa<ConstantStruct>(CPV)) { 1953 if (CPV->getNumOperands()) { 1954 StructType *ST = cast<StructType>(CPV->getType()); 1955 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1956 if (i == (e - 1)) 1957 Bytes = DL.getStructLayout(ST)->getElementOffset(0) + 1958 DL.getTypeAllocSize(ST) - 1959 DL.getStructLayout(ST)->getElementOffset(i); 1960 else 1961 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) - 1962 DL.getStructLayout(ST)->getElementOffset(i); 1963 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 1964 } 1965 } 1966 return; 1967 } 1968 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1969 } 1970 1971 // buildTypeNameMap - Run through symbol table looking for type names. 1972 // 1973 1974 1975 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 1976 switch (MI.getOpcode()) { 1977 default: 1978 return false; 1979 case NVPTX::CallArgBeginInst: 1980 case NVPTX::CallArgEndInst0: 1981 case NVPTX::CallArgEndInst1: 1982 case NVPTX::CallArgF32: 1983 case NVPTX::CallArgF64: 1984 case NVPTX::CallArgI16: 1985 case NVPTX::CallArgI32: 1986 case NVPTX::CallArgI32imm: 1987 case NVPTX::CallArgI64: 1988 case NVPTX::CallArgParam: 1989 case NVPTX::CallVoidInst: 1990 case NVPTX::CallVoidInstReg: 1991 case NVPTX::Callseq_End: 1992 case NVPTX::CallVoidInstReg64: 1993 case NVPTX::DeclareParamInst: 1994 case NVPTX::DeclareRetMemInst: 1995 case NVPTX::DeclareRetRegInst: 1996 case NVPTX::DeclareRetScalarInst: 1997 case NVPTX::DeclareScalarParamInst: 1998 case NVPTX::DeclareScalarRegInst: 1999 case NVPTX::StoreParamF32: 2000 case NVPTX::StoreParamF64: 2001 case NVPTX::StoreParamI16: 2002 case NVPTX::StoreParamI32: 2003 case NVPTX::StoreParamI64: 2004 case NVPTX::StoreParamI8: 2005 case NVPTX::StoreRetvalF32: 2006 case NVPTX::StoreRetvalF64: 2007 case NVPTX::StoreRetvalI16: 2008 case NVPTX::StoreRetvalI32: 2009 case NVPTX::StoreRetvalI64: 2010 case NVPTX::StoreRetvalI8: 2011 case NVPTX::LastCallArgF32: 2012 case NVPTX::LastCallArgF64: 2013 case NVPTX::LastCallArgI16: 2014 case NVPTX::LastCallArgI32: 2015 case NVPTX::LastCallArgI32imm: 2016 case NVPTX::LastCallArgI64: 2017 case NVPTX::LastCallArgParam: 2018 case NVPTX::LoadParamMemF32: 2019 case NVPTX::LoadParamMemF64: 2020 case NVPTX::LoadParamMemI16: 2021 case NVPTX::LoadParamMemI32: 2022 case NVPTX::LoadParamMemI64: 2023 case NVPTX::LoadParamMemI8: 2024 case NVPTX::PrototypeInst: 2025 case NVPTX::DBG_VALUE: 2026 return true; 2027 } 2028 return false; 2029 } 2030 2031 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly 2032 /// a copy from AsmPrinter::lowerConstant, except customized to only handle 2033 /// expressions that are representable in PTX and create 2034 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions. 2035 const MCExpr * 2036 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) { 2037 MCContext &Ctx = OutContext; 2038 2039 if (CV->isNullValue() || isa<UndefValue>(CV)) 2040 return MCConstantExpr::create(0, Ctx); 2041 2042 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 2043 return MCConstantExpr::create(CI->getZExtValue(), Ctx); 2044 2045 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { 2046 const MCSymbolRefExpr *Expr = 2047 MCSymbolRefExpr::create(getSymbol(GV), Ctx); 2048 if (ProcessingGeneric) { 2049 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx); 2050 } else { 2051 return Expr; 2052 } 2053 } 2054 2055 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 2056 if (!CE) { 2057 llvm_unreachable("Unknown constant value to lower!"); 2058 } 2059 2060 switch (CE->getOpcode()) { 2061 default: 2062 // If the code isn't optimized, there may be outstanding folding 2063 // opportunities. Attempt to fold the expression using DataLayout as a 2064 // last resort before giving up. 2065 if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout())) 2066 if (C != CE) 2067 return lowerConstantForGV(C, ProcessingGeneric); 2068 2069 // Otherwise report the problem to the user. 2070 { 2071 std::string S; 2072 raw_string_ostream OS(S); 2073 OS << "Unsupported expression in static initializer: "; 2074 CE->printAsOperand(OS, /*PrintType=*/false, 2075 !MF ? nullptr : MF->getFunction()->getParent()); 2076 report_fatal_error(OS.str()); 2077 } 2078 2079 case Instruction::AddrSpaceCast: { 2080 // Strip the addrspacecast and pass along the operand 2081 PointerType *DstTy = cast<PointerType>(CE->getType()); 2082 if (DstTy->getAddressSpace() == 0) { 2083 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true); 2084 } 2085 std::string S; 2086 raw_string_ostream OS(S); 2087 OS << "Unsupported expression in static initializer: "; 2088 CE->printAsOperand(OS, /*PrintType=*/ false, 2089 !MF ? 0 : MF->getFunction()->getParent()); 2090 report_fatal_error(OS.str()); 2091 } 2092 2093 case Instruction::GetElementPtr: { 2094 const DataLayout &DL = getDataLayout(); 2095 2096 // Generate a symbolic expression for the byte address 2097 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); 2098 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); 2099 2100 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0), 2101 ProcessingGeneric); 2102 if (!OffsetAI) 2103 return Base; 2104 2105 int64_t Offset = OffsetAI.getSExtValue(); 2106 return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx), 2107 Ctx); 2108 } 2109 2110 case Instruction::Trunc: 2111 // We emit the value and depend on the assembler to truncate the generated 2112 // expression properly. This is important for differences between 2113 // blockaddress labels. Since the two labels are in the same function, it 2114 // is reasonable to treat their delta as a 32-bit value. 2115 // FALL THROUGH. 2116 case Instruction::BitCast: 2117 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); 2118 2119 case Instruction::IntToPtr: { 2120 const DataLayout &DL = getDataLayout(); 2121 2122 // Handle casts to pointers by changing them into casts to the appropriate 2123 // integer type. This promotes constant folding and simplifies this code. 2124 Constant *Op = CE->getOperand(0); 2125 Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), 2126 false/*ZExt*/); 2127 return lowerConstantForGV(Op, ProcessingGeneric); 2128 } 2129 2130 case Instruction::PtrToInt: { 2131 const DataLayout &DL = getDataLayout(); 2132 2133 // Support only foldable casts to/from pointers that can be eliminated by 2134 // changing the pointer to the appropriately sized integer type. 2135 Constant *Op = CE->getOperand(0); 2136 Type *Ty = CE->getType(); 2137 2138 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric); 2139 2140 // We can emit the pointer value into this slot if the slot is an 2141 // integer slot equal to the size of the pointer. 2142 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) 2143 return OpExpr; 2144 2145 // Otherwise the pointer is smaller than the resultant integer, mask off 2146 // the high bits so we are sure to get a proper truncation if the input is 2147 // a constant expr. 2148 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); 2149 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx); 2150 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); 2151 } 2152 2153 // The MC library also has a right-shift operator, but it isn't consistently 2154 // signed or unsigned between different targets. 2155 case Instruction::Add: { 2156 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); 2157 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric); 2158 switch (CE->getOpcode()) { 2159 default: llvm_unreachable("Unknown binary operator constant cast expr"); 2160 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); 2161 } 2162 } 2163 } 2164 } 2165 2166 // Copy of MCExpr::print customized for NVPTX 2167 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { 2168 switch (Expr.getKind()) { 2169 case MCExpr::Target: 2170 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI); 2171 case MCExpr::Constant: 2172 OS << cast<MCConstantExpr>(Expr).getValue(); 2173 return; 2174 2175 case MCExpr::SymbolRef: { 2176 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr); 2177 const MCSymbol &Sym = SRE.getSymbol(); 2178 Sym.print(OS, MAI); 2179 return; 2180 } 2181 2182 case MCExpr::Unary: { 2183 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr); 2184 switch (UE.getOpcode()) { 2185 case MCUnaryExpr::LNot: OS << '!'; break; 2186 case MCUnaryExpr::Minus: OS << '-'; break; 2187 case MCUnaryExpr::Not: OS << '~'; break; 2188 case MCUnaryExpr::Plus: OS << '+'; break; 2189 } 2190 printMCExpr(*UE.getSubExpr(), OS); 2191 return; 2192 } 2193 2194 case MCExpr::Binary: { 2195 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr); 2196 2197 // Only print parens around the LHS if it is non-trivial. 2198 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) || 2199 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) { 2200 printMCExpr(*BE.getLHS(), OS); 2201 } else { 2202 OS << '('; 2203 printMCExpr(*BE.getLHS(), OS); 2204 OS<< ')'; 2205 } 2206 2207 switch (BE.getOpcode()) { 2208 case MCBinaryExpr::Add: 2209 // Print "X-42" instead of "X+-42". 2210 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) { 2211 if (RHSC->getValue() < 0) { 2212 OS << RHSC->getValue(); 2213 return; 2214 } 2215 } 2216 2217 OS << '+'; 2218 break; 2219 default: llvm_unreachable("Unhandled binary operator"); 2220 } 2221 2222 // Only print parens around the LHS if it is non-trivial. 2223 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) { 2224 printMCExpr(*BE.getRHS(), OS); 2225 } else { 2226 OS << '('; 2227 printMCExpr(*BE.getRHS(), OS); 2228 OS << ')'; 2229 } 2230 return; 2231 } 2232 } 2233 2234 llvm_unreachable("Invalid expression kind!"); 2235 } 2236 2237 /// PrintAsmOperand - Print out an operand for an inline asm expression. 2238 /// 2239 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 2240 unsigned AsmVariant, 2241 const char *ExtraCode, raw_ostream &O) { 2242 if (ExtraCode && ExtraCode[0]) { 2243 if (ExtraCode[1] != 0) 2244 return true; // Unknown modifier. 2245 2246 switch (ExtraCode[0]) { 2247 default: 2248 // See if this is a generic print operand 2249 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 2250 case 'r': 2251 break; 2252 } 2253 } 2254 2255 printOperand(MI, OpNo, O); 2256 2257 return false; 2258 } 2259 2260 bool NVPTXAsmPrinter::PrintAsmMemoryOperand( 2261 const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, 2262 const char *ExtraCode, raw_ostream &O) { 2263 if (ExtraCode && ExtraCode[0]) 2264 return true; // Unknown modifier 2265 2266 O << '['; 2267 printMemOperand(MI, OpNo, O); 2268 O << ']'; 2269 2270 return false; 2271 } 2272 2273 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 2274 raw_ostream &O, const char *Modifier) { 2275 const MachineOperand &MO = MI->getOperand(opNum); 2276 switch (MO.getType()) { 2277 case MachineOperand::MO_Register: 2278 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 2279 if (MO.getReg() == NVPTX::VRDepot) 2280 O << DEPOTNAME << getFunctionNumber(); 2281 else 2282 O << NVPTXInstPrinter::getRegisterName(MO.getReg()); 2283 } else { 2284 emitVirtualRegister(MO.getReg(), O); 2285 } 2286 return; 2287 2288 case MachineOperand::MO_Immediate: 2289 if (!Modifier) 2290 O << MO.getImm(); 2291 else if (strstr(Modifier, "vec") == Modifier) 2292 printVecModifiedImmediate(MO, Modifier, O); 2293 else 2294 llvm_unreachable( 2295 "Don't know how to handle modifier on immediate operand"); 2296 return; 2297 2298 case MachineOperand::MO_FPImmediate: 2299 printFPConstant(MO.getFPImm(), O); 2300 break; 2301 2302 case MachineOperand::MO_GlobalAddress: 2303 getSymbol(MO.getGlobal())->print(O, MAI); 2304 break; 2305 2306 case MachineOperand::MO_MachineBasicBlock: 2307 MO.getMBB()->getSymbol()->print(O, MAI); 2308 return; 2309 2310 default: 2311 llvm_unreachable("Operand type not supported."); 2312 } 2313 } 2314 2315 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 2316 raw_ostream &O, const char *Modifier) { 2317 printOperand(MI, opNum, O); 2318 2319 if (Modifier && !strcmp(Modifier, "add")) { 2320 O << ", "; 2321 printOperand(MI, opNum + 1, O); 2322 } else { 2323 if (MI->getOperand(opNum + 1).isImm() && 2324 MI->getOperand(opNum + 1).getImm() == 0) 2325 return; // don't print ',0' or '+0' 2326 O << "+"; 2327 printOperand(MI, opNum + 1, O); 2328 } 2329 } 2330 2331 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2332 std::stringstream temp; 2333 LineReader *reader = this->getReader(filename); 2334 temp << "\n//"; 2335 temp << filename.str(); 2336 temp << ":"; 2337 temp << line; 2338 temp << " "; 2339 temp << reader->readLine(line); 2340 temp << "\n"; 2341 this->OutStreamer->EmitRawText(temp.str()); 2342 } 2343 2344 LineReader *NVPTXAsmPrinter::getReader(const std::string &filename) { 2345 if (!reader) { 2346 reader = new LineReader(filename); 2347 } 2348 2349 if (reader->fileName() != filename) { 2350 delete reader; 2351 reader = new LineReader(filename); 2352 } 2353 2354 return reader; 2355 } 2356 2357 std::string LineReader::readLine(unsigned lineNum) { 2358 if (lineNum < theCurLine) { 2359 theCurLine = 0; 2360 fstr.seekg(0, std::ios::beg); 2361 } 2362 while (theCurLine < lineNum) { 2363 fstr.getline(buff, 500); 2364 theCurLine++; 2365 } 2366 return buff; 2367 } 2368 2369 // Force static initialization. 2370 extern "C" void LLVMInitializeNVPTXAsmPrinter() { 2371 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2372 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2373 } 2374