1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a printer that converts from our internal representation 11 // of machine-dependent LLVM code to NVPTX assembly language. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "NVPTXAsmPrinter.h" 16 #include "InstPrinter/NVPTXInstPrinter.h" 17 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 18 #include "NVPTX.h" 19 #include "NVPTXInstrInfo.h" 20 #include "NVPTXMCExpr.h" 21 #include "NVPTXMachineFunctionInfo.h" 22 #include "NVPTXRegisterInfo.h" 23 #include "NVPTXTargetMachine.h" 24 #include "NVPTXUtilities.h" 25 #include "cl_common_defines.h" 26 #include "llvm/ADT/StringExtras.h" 27 #include "llvm/Analysis/ConstantFolding.h" 28 #include "llvm/CodeGen/Analysis.h" 29 #include "llvm/CodeGen/MachineFrameInfo.h" 30 #include "llvm/CodeGen/MachineLoopInfo.h" 31 #include "llvm/CodeGen/MachineModuleInfo.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/IR/DebugInfo.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/Function.h" 36 #include "llvm/IR/GlobalVariable.h" 37 #include "llvm/IR/Mangler.h" 38 #include "llvm/IR/Module.h" 39 #include "llvm/IR/Operator.h" 40 #include "llvm/MC/MCInst.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/Support/CommandLine.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/FormattedStream.h" 46 #include "llvm/Support/Path.h" 47 #include "llvm/Support/TargetRegistry.h" 48 #include "llvm/Support/TimeValue.h" 49 #include "llvm/Target/TargetLoweringObjectFile.h" 50 #include "llvm/Transforms/Utils/UnrollLoop.h" 51 #include <sstream> 52 using namespace llvm; 53 54 #define DEPOTNAME "__local_depot" 55 56 static cl::opt<bool> 57 EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden, 58 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 59 cl::init(true)); 60 61 static cl::opt<bool> 62 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden, 63 cl::desc("NVPTX Specific: Emit source line in ptx file"), 64 cl::init(false)); 65 66 namespace { 67 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 68 /// depends. 69 void DiscoverDependentGlobals(const Value *V, 70 DenseSet<const GlobalVariable *> &Globals) { 71 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 72 Globals.insert(GV); 73 else { 74 if (const User *U = dyn_cast<User>(V)) { 75 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 76 DiscoverDependentGlobals(U->getOperand(i), Globals); 77 } 78 } 79 } 80 } 81 82 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 83 /// instances to be emitted, but only after any dependents have been added 84 /// first. 85 void VisitGlobalVariableForEmission( 86 const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order, 87 DenseSet<const GlobalVariable *> &Visited, 88 DenseSet<const GlobalVariable *> &Visiting) { 89 // Have we already visited this one? 90 if (Visited.count(GV)) 91 return; 92 93 // Do we have a circular dependency? 94 if (!Visiting.insert(GV).second) 95 report_fatal_error("Circular dependency found in global variable set"); 96 97 // Make sure we visit all dependents first 98 DenseSet<const GlobalVariable *> Others; 99 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 100 DiscoverDependentGlobals(GV->getOperand(i), Others); 101 102 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 103 E = Others.end(); 104 I != E; ++I) 105 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 106 107 // Now we can visit ourself 108 Order.push_back(GV); 109 Visited.insert(GV); 110 Visiting.erase(GV); 111 } 112 } 113 114 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 115 if (!EmitLineNumbers) 116 return; 117 if (ignoreLoc(MI)) 118 return; 119 120 DebugLoc curLoc = MI.getDebugLoc(); 121 122 if (!prevDebugLoc && !curLoc) 123 return; 124 125 if (prevDebugLoc == curLoc) 126 return; 127 128 prevDebugLoc = curLoc; 129 130 if (!curLoc) 131 return; 132 133 auto *Scope = cast_or_null<DIScope>(curLoc.getScope()); 134 if (!Scope) 135 return; 136 137 StringRef fileName(Scope->getFilename()); 138 StringRef dirName(Scope->getDirectory()); 139 SmallString<128> FullPathName = dirName; 140 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 141 sys::path::append(FullPathName, fileName); 142 fileName = FullPathName; 143 } 144 145 if (filenameMap.find(fileName) == filenameMap.end()) 146 return; 147 148 // Emit the line from the source file. 149 if (InterleaveSrc) 150 this->emitSrcInText(fileName, curLoc.getLine()); 151 152 std::stringstream temp; 153 temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine() 154 << " " << curLoc.getCol(); 155 OutStreamer->EmitRawText(temp.str()); 156 } 157 158 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 159 SmallString<128> Str; 160 raw_svector_ostream OS(Str); 161 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) 162 emitLineNumberAsDotLoc(*MI); 163 164 MCInst Inst; 165 lowerToMCInst(MI, Inst); 166 EmitToStreamer(*OutStreamer, Inst); 167 } 168 169 // Handle symbol backtracking for targets that do not support image handles 170 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, 171 unsigned OpNo, MCOperand &MCOp) { 172 const MachineOperand &MO = MI->getOperand(OpNo); 173 const MCInstrDesc &MCID = MI->getDesc(); 174 175 if (MCID.TSFlags & NVPTXII::IsTexFlag) { 176 // This is a texture fetch, so operand 4 is a texref and operand 5 is 177 // a samplerref 178 if (OpNo == 4 && MO.isImm()) { 179 lowerImageHandleSymbol(MO.getImm(), MCOp); 180 return true; 181 } 182 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { 183 lowerImageHandleSymbol(MO.getImm(), MCOp); 184 return true; 185 } 186 187 return false; 188 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { 189 unsigned VecSize = 190 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); 191 192 // For a surface load of vector size N, the Nth operand will be the surfref 193 if (OpNo == VecSize && MO.isImm()) { 194 lowerImageHandleSymbol(MO.getImm(), MCOp); 195 return true; 196 } 197 198 return false; 199 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { 200 // This is a surface store, so operand 0 is a surfref 201 if (OpNo == 0 && MO.isImm()) { 202 lowerImageHandleSymbol(MO.getImm(), MCOp); 203 return true; 204 } 205 206 return false; 207 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { 208 // This is a query, so operand 1 is a surfref/texref 209 if (OpNo == 1 && MO.isImm()) { 210 lowerImageHandleSymbol(MO.getImm(), MCOp); 211 return true; 212 } 213 214 return false; 215 } 216 217 return false; 218 } 219 220 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { 221 // Ewwww 222 TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget()); 223 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM); 224 const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>(); 225 const char *Sym = MFI->getImageHandleSymbol(Index); 226 std::string *SymNamePtr = 227 nvTM.getManagedStrPool()->getManagedString(Sym); 228 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol( 229 StringRef(SymNamePtr->c_str()))); 230 } 231 232 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 233 OutMI.setOpcode(MI->getOpcode()); 234 // Special: Do not mangle symbol operand of CALL_PROTOTYPE 235 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { 236 const MachineOperand &MO = MI->getOperand(0); 237 OutMI.addOperand(GetSymbolRef( 238 OutContext.getOrCreateSymbol(Twine(MO.getSymbolName())))); 239 return; 240 } 241 242 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 243 const MachineOperand &MO = MI->getOperand(i); 244 245 MCOperand MCOp; 246 if (!nvptxSubtarget->hasImageHandles()) { 247 if (lowerImageHandleOperand(MI, i, MCOp)) { 248 OutMI.addOperand(MCOp); 249 continue; 250 } 251 } 252 253 if (lowerOperand(MO, MCOp)) 254 OutMI.addOperand(MCOp); 255 } 256 } 257 258 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 259 MCOperand &MCOp) { 260 switch (MO.getType()) { 261 default: llvm_unreachable("unknown operand type"); 262 case MachineOperand::MO_Register: 263 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg())); 264 break; 265 case MachineOperand::MO_Immediate: 266 MCOp = MCOperand::createImm(MO.getImm()); 267 break; 268 case MachineOperand::MO_MachineBasicBlock: 269 MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( 270 MO.getMBB()->getSymbol(), OutContext)); 271 break; 272 case MachineOperand::MO_ExternalSymbol: 273 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); 274 break; 275 case MachineOperand::MO_GlobalAddress: 276 MCOp = GetSymbolRef(getSymbol(MO.getGlobal())); 277 break; 278 case MachineOperand::MO_FPImmediate: { 279 const ConstantFP *Cnt = MO.getFPImm(); 280 APFloat Val = Cnt->getValueAPF(); 281 282 switch (Cnt->getType()->getTypeID()) { 283 default: report_fatal_error("Unsupported FP type"); break; 284 case Type::FloatTyID: 285 MCOp = MCOperand::createExpr( 286 NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext)); 287 break; 288 case Type::DoubleTyID: 289 MCOp = MCOperand::createExpr( 290 NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext)); 291 break; 292 } 293 break; 294 } 295 } 296 return true; 297 } 298 299 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 300 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 301 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 302 303 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 304 unsigned RegNum = RegMap[Reg]; 305 306 // Encode the register class in the upper 4 bits 307 // Must be kept in sync with NVPTXInstPrinter::printRegName 308 unsigned Ret = 0; 309 if (RC == &NVPTX::Int1RegsRegClass) { 310 Ret = (1 << 28); 311 } else if (RC == &NVPTX::Int16RegsRegClass) { 312 Ret = (2 << 28); 313 } else if (RC == &NVPTX::Int32RegsRegClass) { 314 Ret = (3 << 28); 315 } else if (RC == &NVPTX::Int64RegsRegClass) { 316 Ret = (4 << 28); 317 } else if (RC == &NVPTX::Float32RegsRegClass) { 318 Ret = (5 << 28); 319 } else if (RC == &NVPTX::Float64RegsRegClass) { 320 Ret = (6 << 28); 321 } else { 322 report_fatal_error("Bad register class"); 323 } 324 325 // Insert the vreg number 326 Ret |= (RegNum & 0x0FFFFFFF); 327 return Ret; 328 } else { 329 // Some special-use registers are actually physical registers. 330 // Encode this as the register class ID of 0 and the real register ID. 331 return Reg & 0x0FFFFFFF; 332 } 333 } 334 335 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { 336 const MCExpr *Expr; 337 Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, 338 OutContext); 339 return MCOperand::createExpr(Expr); 340 } 341 342 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 343 const DataLayout &DL = getDataLayout(); 344 const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); 345 346 Type *Ty = F->getReturnType(); 347 348 bool isABI = (nvptxSubtarget->getSmVersion() >= 20); 349 350 if (Ty->getTypeID() == Type::VoidTyID) 351 return; 352 353 O << " ("; 354 355 if (isABI) { 356 if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) { 357 unsigned size = 0; 358 if (auto *ITy = dyn_cast<IntegerType>(Ty)) { 359 size = ITy->getBitWidth(); 360 if (size < 32) 361 size = 32; 362 } else { 363 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 364 size = Ty->getPrimitiveSizeInBits(); 365 } 366 367 O << ".param .b" << size << " func_retval0"; 368 } else if (isa<PointerType>(Ty)) { 369 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits() 370 << " func_retval0"; 371 } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { 372 unsigned totalsz = DL.getTypeAllocSize(Ty); 373 unsigned retAlignment = 0; 374 if (!llvm::getAlign(*F, 0, retAlignment)) 375 retAlignment = DL.getABITypeAlignment(Ty); 376 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 377 << "]"; 378 } else 379 llvm_unreachable("Unknown return type"); 380 } else { 381 SmallVector<EVT, 16> vtparts; 382 ComputeValueVTs(*TLI, DL, Ty, vtparts); 383 unsigned idx = 0; 384 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 385 unsigned elems = 1; 386 EVT elemtype = vtparts[i]; 387 if (vtparts[i].isVector()) { 388 elems = vtparts[i].getVectorNumElements(); 389 elemtype = vtparts[i].getVectorElementType(); 390 } 391 392 for (unsigned j = 0, je = elems; j != je; ++j) { 393 unsigned sz = elemtype.getSizeInBits(); 394 if (elemtype.isInteger() && (sz < 32)) 395 sz = 32; 396 O << ".reg .b" << sz << " func_retval" << idx; 397 if (j < je - 1) 398 O << ", "; 399 ++idx; 400 } 401 if (i < e - 1) 402 O << ", "; 403 } 404 } 405 O << ") "; 406 return; 407 } 408 409 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 410 raw_ostream &O) { 411 const Function *F = MF.getFunction(); 412 printReturnValStr(F, O); 413 } 414 415 // Return true if MBB is the header of a loop marked with 416 // llvm.loop.unroll.disable. 417 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll". 418 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( 419 const MachineBasicBlock &MBB) const { 420 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); 421 // We insert .pragma "nounroll" only to the loop header. 422 if (!LI.isLoopHeader(&MBB)) 423 return false; 424 425 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, 426 // we iterate through each back edge of the loop with header MBB, and check 427 // whether its metadata contains llvm.loop.unroll.disable. 428 for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) { 429 const MachineBasicBlock *PMBB = *I; 430 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) { 431 // Edges from other loops to MBB are not back edges. 432 continue; 433 } 434 if (const BasicBlock *PBB = PMBB->getBasicBlock()) { 435 if (MDNode *LoopID = PBB->getTerminator()->getMetadata("llvm.loop")) { 436 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable")) 437 return true; 438 } 439 } 440 } 441 return false; 442 } 443 444 void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { 445 AsmPrinter::EmitBasicBlockStart(MBB); 446 if (isLoopHeaderOfNoUnroll(MBB)) 447 OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n")); 448 } 449 450 void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 451 SmallString<128> Str; 452 raw_svector_ostream O(Str); 453 454 if (!GlobalsEmitted) { 455 emitGlobals(*MF->getFunction()->getParent()); 456 GlobalsEmitted = true; 457 } 458 459 // Set up 460 MRI = &MF->getRegInfo(); 461 F = MF->getFunction(); 462 emitLinkageDirective(F, O); 463 if (llvm::isKernelFunction(*F)) 464 O << ".entry "; 465 else { 466 O << ".func "; 467 printReturnValStr(*MF, O); 468 } 469 470 CurrentFnSym->print(O, MAI); 471 472 emitFunctionParamList(*MF, O); 473 474 if (llvm::isKernelFunction(*F)) 475 emitKernelFunctionDirectives(*F, O); 476 477 OutStreamer->EmitRawText(O.str()); 478 479 prevDebugLoc = DebugLoc(); 480 } 481 482 void NVPTXAsmPrinter::EmitFunctionBodyStart() { 483 VRegMapping.clear(); 484 OutStreamer->EmitRawText(StringRef("{\n")); 485 setAndEmitFunctionVirtualRegisters(*MF); 486 487 SmallString<128> Str; 488 raw_svector_ostream O(Str); 489 emitDemotedVars(MF->getFunction(), O); 490 OutStreamer->EmitRawText(O.str()); 491 } 492 493 void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 494 OutStreamer->EmitRawText(StringRef("}\n")); 495 VRegMapping.clear(); 496 } 497 498 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { 499 unsigned RegNo = MI->getOperand(0).getReg(); 500 if (TargetRegisterInfo::isVirtualRegister(RegNo)) { 501 OutStreamer->AddComment(Twine("implicit-def: ") + 502 getVirtualRegisterName(RegNo)); 503 } else { 504 OutStreamer->AddComment(Twine("implicit-def: ") + 505 nvptxSubtarget->getRegisterInfo()->getName(RegNo)); 506 } 507 OutStreamer->AddBlankLine(); 508 } 509 510 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 511 raw_ostream &O) const { 512 // If the NVVM IR has some of reqntid* specified, then output 513 // the reqntid directive, and set the unspecified ones to 1. 514 // If none of reqntid* is specified, don't output reqntid directive. 515 unsigned reqntidx, reqntidy, reqntidz; 516 bool specified = false; 517 if (!llvm::getReqNTIDx(F, reqntidx)) 518 reqntidx = 1; 519 else 520 specified = true; 521 if (!llvm::getReqNTIDy(F, reqntidy)) 522 reqntidy = 1; 523 else 524 specified = true; 525 if (!llvm::getReqNTIDz(F, reqntidz)) 526 reqntidz = 1; 527 else 528 specified = true; 529 530 if (specified) 531 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 532 << "\n"; 533 534 // If the NVVM IR has some of maxntid* specified, then output 535 // the maxntid directive, and set the unspecified ones to 1. 536 // If none of maxntid* is specified, don't output maxntid directive. 537 unsigned maxntidx, maxntidy, maxntidz; 538 specified = false; 539 if (!llvm::getMaxNTIDx(F, maxntidx)) 540 maxntidx = 1; 541 else 542 specified = true; 543 if (!llvm::getMaxNTIDy(F, maxntidy)) 544 maxntidy = 1; 545 else 546 specified = true; 547 if (!llvm::getMaxNTIDz(F, maxntidz)) 548 maxntidz = 1; 549 else 550 specified = true; 551 552 if (specified) 553 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 554 << "\n"; 555 556 unsigned mincta; 557 if (llvm::getMinCTASm(F, mincta)) 558 O << ".minnctapersm " << mincta << "\n"; 559 } 560 561 std::string 562 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { 563 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 564 565 std::string Name; 566 raw_string_ostream NameStr(Name); 567 568 VRegRCMap::const_iterator I = VRegMapping.find(RC); 569 assert(I != VRegMapping.end() && "Bad register class"); 570 const DenseMap<unsigned, unsigned> &RegMap = I->second; 571 572 VRegMap::const_iterator VI = RegMap.find(Reg); 573 assert(VI != RegMap.end() && "Bad virtual register"); 574 unsigned MappedVR = VI->second; 575 576 NameStr << getNVPTXRegClassStr(RC) << MappedVR; 577 578 NameStr.flush(); 579 return Name; 580 } 581 582 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, 583 raw_ostream &O) { 584 O << getVirtualRegisterName(vr); 585 } 586 587 void NVPTXAsmPrinter::printVecModifiedImmediate( 588 const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 589 static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 590 int Imm = (int) MO.getImm(); 591 if (0 == strcmp(Modifier, "vecelem")) 592 O << "_" << vecelem[Imm]; 593 else if (0 == strcmp(Modifier, "vecv4comm1")) { 594 if ((Imm < 0) || (Imm > 3)) 595 O << "//"; 596 } else if (0 == strcmp(Modifier, "vecv4comm2")) { 597 if ((Imm < 4) || (Imm > 7)) 598 O << "//"; 599 } else if (0 == strcmp(Modifier, "vecv4pos")) { 600 if (Imm < 0) 601 Imm = 0; 602 O << "_" << vecelem[Imm % 4]; 603 } else if (0 == strcmp(Modifier, "vecv2comm1")) { 604 if ((Imm < 0) || (Imm > 1)) 605 O << "//"; 606 } else if (0 == strcmp(Modifier, "vecv2comm2")) { 607 if ((Imm < 2) || (Imm > 3)) 608 O << "//"; 609 } else if (0 == strcmp(Modifier, "vecv2pos")) { 610 if (Imm < 0) 611 Imm = 0; 612 O << "_" << vecelem[Imm % 2]; 613 } else 614 llvm_unreachable("Unknown Modifier on immediate operand"); 615 } 616 617 618 619 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 620 621 emitLinkageDirective(F, O); 622 if (llvm::isKernelFunction(*F)) 623 O << ".entry "; 624 else 625 O << ".func "; 626 printReturnValStr(F, O); 627 getSymbol(F)->print(O, MAI); 628 O << "\n"; 629 emitFunctionParamList(F, O); 630 O << ";\n"; 631 } 632 633 static bool usedInGlobalVarDef(const Constant *C) { 634 if (!C) 635 return false; 636 637 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 638 return GV->getName() != "llvm.used"; 639 } 640 641 for (const User *U : C->users()) 642 if (const Constant *C = dyn_cast<Constant>(U)) 643 if (usedInGlobalVarDef(C)) 644 return true; 645 646 return false; 647 } 648 649 static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 650 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 651 if (othergv->getName() == "llvm.used") 652 return true; 653 } 654 655 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 656 if (instr->getParent() && instr->getParent()->getParent()) { 657 const Function *curFunc = instr->getParent()->getParent(); 658 if (oneFunc && (curFunc != oneFunc)) 659 return false; 660 oneFunc = curFunc; 661 return true; 662 } else 663 return false; 664 } 665 666 for (const User *UU : U->users()) 667 if (!usedInOneFunc(UU, oneFunc)) 668 return false; 669 670 return true; 671 } 672 673 /* Find out if a global variable can be demoted to local scope. 674 * Currently, this is valid for CUDA shared variables, which have local 675 * scope and global lifetime. So the conditions to check are : 676 * 1. Is the global variable in shared address space? 677 * 2. Does it have internal linkage? 678 * 3. Is the global variable referenced only in one function? 679 */ 680 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 681 if (!gv->hasInternalLinkage()) 682 return false; 683 PointerType *Pty = gv->getType(); 684 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 685 return false; 686 687 const Function *oneFunc = nullptr; 688 689 bool flag = usedInOneFunc(gv, oneFunc); 690 if (!flag) 691 return false; 692 if (!oneFunc) 693 return false; 694 f = oneFunc; 695 return true; 696 } 697 698 static bool useFuncSeen(const Constant *C, 699 llvm::DenseMap<const Function *, bool> &seenMap) { 700 for (const User *U : C->users()) { 701 if (const Constant *cu = dyn_cast<Constant>(U)) { 702 if (useFuncSeen(cu, seenMap)) 703 return true; 704 } else if (const Instruction *I = dyn_cast<Instruction>(U)) { 705 const BasicBlock *bb = I->getParent(); 706 if (!bb) 707 continue; 708 const Function *caller = bb->getParent(); 709 if (!caller) 710 continue; 711 if (seenMap.find(caller) != seenMap.end()) 712 return true; 713 } 714 } 715 return false; 716 } 717 718 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 719 llvm::DenseMap<const Function *, bool> seenMap; 720 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 721 const Function *F = &*FI; 722 723 if (F->isDeclaration()) { 724 if (F->use_empty()) 725 continue; 726 if (F->getIntrinsicID()) 727 continue; 728 emitDeclaration(F, O); 729 continue; 730 } 731 for (const User *U : F->users()) { 732 if (const Constant *C = dyn_cast<Constant>(U)) { 733 if (usedInGlobalVarDef(C)) { 734 // The use is in the initialization of a global variable 735 // that is a function pointer, so print a declaration 736 // for the original function 737 emitDeclaration(F, O); 738 break; 739 } 740 // Emit a declaration of this function if the function that 741 // uses this constant expr has already been seen. 742 if (useFuncSeen(C, seenMap)) { 743 emitDeclaration(F, O); 744 break; 745 } 746 } 747 748 if (!isa<Instruction>(U)) 749 continue; 750 const Instruction *instr = cast<Instruction>(U); 751 const BasicBlock *bb = instr->getParent(); 752 if (!bb) 753 continue; 754 const Function *caller = bb->getParent(); 755 if (!caller) 756 continue; 757 758 // If a caller has already been seen, then the caller is 759 // appearing in the module before the callee. so print out 760 // a declaration for the callee. 761 if (seenMap.find(caller) != seenMap.end()) { 762 emitDeclaration(F, O); 763 break; 764 } 765 } 766 seenMap[F] = true; 767 } 768 } 769 770 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 771 DebugInfoFinder DbgFinder; 772 DbgFinder.processModule(M); 773 774 unsigned i = 1; 775 for (const DICompileUnit *DIUnit : DbgFinder.compile_units()) { 776 StringRef Filename = DIUnit->getFilename(); 777 StringRef Dirname = DIUnit->getDirectory(); 778 SmallString<128> FullPathName = Dirname; 779 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 780 sys::path::append(FullPathName, Filename); 781 Filename = FullPathName; 782 } 783 if (filenameMap.find(Filename) != filenameMap.end()) 784 continue; 785 filenameMap[Filename] = i; 786 OutStreamer->EmitDwarfFileDirective(i, "", Filename); 787 ++i; 788 } 789 790 for (DISubprogram *SP : DbgFinder.subprograms()) { 791 StringRef Filename = SP->getFilename(); 792 StringRef Dirname = SP->getDirectory(); 793 SmallString<128> FullPathName = Dirname; 794 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 795 sys::path::append(FullPathName, Filename); 796 Filename = FullPathName; 797 } 798 if (filenameMap.find(Filename) != filenameMap.end()) 799 continue; 800 filenameMap[Filename] = i; 801 ++i; 802 } 803 } 804 805 bool NVPTXAsmPrinter::doInitialization(Module &M) { 806 // Construct a default subtarget off of the TargetMachine defaults. The 807 // rest of NVPTX isn't friendly to change subtargets per function and 808 // so the default TargetMachine will have all of the options. 809 const Triple &TT = TM.getTargetTriple(); 810 StringRef CPU = TM.getTargetCPU(); 811 StringRef FS = TM.getTargetFeatureString(); 812 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 813 const NVPTXSubtarget STI(TT, CPU, FS, NTM); 814 815 SmallString<128> Str1; 816 raw_svector_ostream OS1(Str1); 817 818 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 819 820 // We need to call the parent's one explicitly. 821 //bool Result = AsmPrinter::doInitialization(M); 822 823 // Initialize TargetLoweringObjectFile. 824 const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 825 .Initialize(OutContext, TM); 826 827 Mang = new Mangler(); 828 829 // Emit header before any dwarf directives are emitted below. 830 emitHeader(M, OS1, STI); 831 OutStreamer->EmitRawText(OS1.str()); 832 833 // Already commented out 834 //bool Result = AsmPrinter::doInitialization(M); 835 836 // Emit module-level inline asm if it exists. 837 if (!M.getModuleInlineAsm().empty()) { 838 OutStreamer->AddComment("Start of file scope inline assembly"); 839 OutStreamer->AddBlankLine(); 840 OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm())); 841 OutStreamer->AddBlankLine(); 842 OutStreamer->AddComment("End of file scope inline assembly"); 843 OutStreamer->AddBlankLine(); 844 } 845 846 // If we're not NVCL we're CUDA, go ahead and emit filenames. 847 if (TM.getTargetTriple().getOS() != Triple::NVCL) 848 recordAndEmitFilenames(M); 849 850 GlobalsEmitted = false; 851 852 return false; // success 853 } 854 855 void NVPTXAsmPrinter::emitGlobals(const Module &M) { 856 SmallString<128> Str2; 857 raw_svector_ostream OS2(Str2); 858 859 emitDeclarations(M, OS2); 860 861 // As ptxas does not support forward references of globals, we need to first 862 // sort the list of module-level globals in def-use order. We visit each 863 // global variable in order, and ensure that we emit it *after* its dependent 864 // globals. We use a little extra memory maintaining both a set and a list to 865 // have fast searches while maintaining a strict ordering. 866 SmallVector<const GlobalVariable *, 8> Globals; 867 DenseSet<const GlobalVariable *> GVVisited; 868 DenseSet<const GlobalVariable *> GVVisiting; 869 870 // Visit each global variable, in order 871 for (const GlobalVariable &I : M.globals()) 872 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting); 873 874 assert(GVVisited.size() == M.getGlobalList().size() && 875 "Missed a global variable"); 876 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 877 878 // Print out module-level global variables in proper order 879 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 880 printModuleLevelGV(Globals[i], OS2); 881 882 OS2 << '\n'; 883 884 OutStreamer->EmitRawText(OS2.str()); 885 } 886 887 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O, 888 const NVPTXSubtarget &STI) { 889 O << "//\n"; 890 O << "// Generated by LLVM NVPTX Back-End\n"; 891 O << "//\n"; 892 O << "\n"; 893 894 unsigned PTXVersion = STI.getPTXVersion(); 895 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 896 897 O << ".target "; 898 O << STI.getTargetName(); 899 900 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 901 if (NTM.getDrvInterface() == NVPTX::NVCL) 902 O << ", texmode_independent"; 903 else { 904 if (!STI.hasDouble()) 905 O << ", map_f64_to_f32"; 906 } 907 908 if (MAI->doesSupportDebugInformation()) 909 O << ", debug"; 910 911 O << "\n"; 912 913 O << ".address_size "; 914 if (NTM.is64Bit()) 915 O << "64"; 916 else 917 O << "32"; 918 O << "\n"; 919 920 O << "\n"; 921 } 922 923 bool NVPTXAsmPrinter::doFinalization(Module &M) { 924 // If we did not emit any functions, then the global declarations have not 925 // yet been emitted. 926 if (!GlobalsEmitted) { 927 emitGlobals(M); 928 GlobalsEmitted = true; 929 } 930 931 // XXX Temproarily remove global variables so that doFinalization() will not 932 // emit them again (global variables are emitted at beginning). 933 934 Module::GlobalListType &global_list = M.getGlobalList(); 935 int i, n = global_list.size(); 936 GlobalVariable **gv_array = new GlobalVariable *[n]; 937 938 // first, back-up GlobalVariable in gv_array 939 i = 0; 940 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 941 I != E; ++I) 942 gv_array[i++] = &*I; 943 944 // second, empty global_list 945 while (!global_list.empty()) 946 global_list.remove(global_list.begin()); 947 948 // call doFinalization 949 bool ret = AsmPrinter::doFinalization(M); 950 951 // now we restore global variables 952 for (i = 0; i < n; i++) 953 global_list.insert(global_list.end(), gv_array[i]); 954 955 clearAnnotationCache(&M); 956 957 delete[] gv_array; 958 return ret; 959 960 //bool Result = AsmPrinter::doFinalization(M); 961 // Instead of calling the parents doFinalization, we may 962 // clone parents doFinalization and customize here. 963 // Currently, we if NVISA out the EmitGlobals() in 964 // parent's doFinalization, which is too intrusive. 965 // 966 // Same for the doInitialization. 967 //return Result; 968 } 969 970 // This function emits appropriate linkage directives for 971 // functions and global variables. 972 // 973 // extern function declaration -> .extern 974 // extern function definition -> .visible 975 // external global variable with init -> .visible 976 // external without init -> .extern 977 // appending -> not allowed, assert. 978 // for any linkage other than 979 // internal, private, linker_private, 980 // linker_private_weak, linker_private_weak_def_auto, 981 // we emit -> .weak. 982 983 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 984 raw_ostream &O) { 985 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) { 986 if (V->hasExternalLinkage()) { 987 if (isa<GlobalVariable>(V)) { 988 const GlobalVariable *GVar = cast<GlobalVariable>(V); 989 if (GVar) { 990 if (GVar->hasInitializer()) 991 O << ".visible "; 992 else 993 O << ".extern "; 994 } 995 } else if (V->isDeclaration()) 996 O << ".extern "; 997 else 998 O << ".visible "; 999 } else if (V->hasAppendingLinkage()) { 1000 std::string msg; 1001 msg.append("Error: "); 1002 msg.append("Symbol "); 1003 if (V->hasName()) 1004 msg.append(V->getName()); 1005 msg.append("has unsupported appending linkage type"); 1006 llvm_unreachable(msg.c_str()); 1007 } else if (!V->hasInternalLinkage() && 1008 !V->hasPrivateLinkage()) { 1009 O << ".weak "; 1010 } 1011 } 1012 } 1013 1014 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 1015 raw_ostream &O, 1016 bool processDemoted) { 1017 1018 // Skip meta data 1019 if (GVar->hasSection()) { 1020 if (GVar->getSection() == StringRef("llvm.metadata")) 1021 return; 1022 } 1023 1024 // Skip LLVM intrinsic global variables 1025 if (GVar->getName().startswith("llvm.") || 1026 GVar->getName().startswith("nvvm.")) 1027 return; 1028 1029 const DataLayout &DL = getDataLayout(); 1030 1031 // GlobalVariables are always constant pointers themselves. 1032 PointerType *PTy = GVar->getType(); 1033 Type *ETy = PTy->getElementType(); 1034 1035 if (GVar->hasExternalLinkage()) { 1036 if (GVar->hasInitializer()) 1037 O << ".visible "; 1038 else 1039 O << ".extern "; 1040 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() || 1041 GVar->hasAvailableExternallyLinkage() || 1042 GVar->hasCommonLinkage()) { 1043 O << ".weak "; 1044 } 1045 1046 if (llvm::isTexture(*GVar)) { 1047 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 1048 return; 1049 } 1050 1051 if (llvm::isSurface(*GVar)) { 1052 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 1053 return; 1054 } 1055 1056 if (GVar->isDeclaration()) { 1057 // (extern) declarations, no definition or initializer 1058 // Currently the only known declaration is for an automatic __local 1059 // (.shared) promoted to global. 1060 emitPTXGlobalVariable(GVar, O); 1061 O << ";\n"; 1062 return; 1063 } 1064 1065 if (llvm::isSampler(*GVar)) { 1066 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 1067 1068 const Constant *Initializer = nullptr; 1069 if (GVar->hasInitializer()) 1070 Initializer = GVar->getInitializer(); 1071 const ConstantInt *CI = nullptr; 1072 if (Initializer) 1073 CI = dyn_cast<ConstantInt>(Initializer); 1074 if (CI) { 1075 unsigned sample = CI->getZExtValue(); 1076 1077 O << " = { "; 1078 1079 for (int i = 0, 1080 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1081 i < 3; i++) { 1082 O << "addr_mode_" << i << " = "; 1083 switch (addr) { 1084 case 0: 1085 O << "wrap"; 1086 break; 1087 case 1: 1088 O << "clamp_to_border"; 1089 break; 1090 case 2: 1091 O << "clamp_to_edge"; 1092 break; 1093 case 3: 1094 O << "wrap"; 1095 break; 1096 case 4: 1097 O << "mirror"; 1098 break; 1099 } 1100 O << ", "; 1101 } 1102 O << "filter_mode = "; 1103 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1104 case 0: 1105 O << "nearest"; 1106 break; 1107 case 1: 1108 O << "linear"; 1109 break; 1110 case 2: 1111 llvm_unreachable("Anisotropic filtering is not supported"); 1112 default: 1113 O << "nearest"; 1114 break; 1115 } 1116 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1117 O << ", force_unnormalized_coords = 1"; 1118 } 1119 O << " }"; 1120 } 1121 1122 O << ";\n"; 1123 return; 1124 } 1125 1126 if (GVar->hasPrivateLinkage()) { 1127 1128 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 1129 return; 1130 1131 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1132 if (!strncmp(GVar->getName().data(), "filename", 8)) 1133 return; 1134 if (GVar->use_empty()) 1135 return; 1136 } 1137 1138 const Function *demotedFunc = nullptr; 1139 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1140 O << "// " << GVar->getName() << " has been demoted\n"; 1141 if (localDecls.find(demotedFunc) != localDecls.end()) 1142 localDecls[demotedFunc].push_back(GVar); 1143 else { 1144 std::vector<const GlobalVariable *> temp; 1145 temp.push_back(GVar); 1146 localDecls[demotedFunc] = temp; 1147 } 1148 return; 1149 } 1150 1151 O << "."; 1152 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1153 1154 if (isManaged(*GVar)) { 1155 O << " .attribute(.managed)"; 1156 } 1157 1158 if (GVar->getAlignment() == 0) 1159 O << " .align " << (int)DL.getPrefTypeAlignment(ETy); 1160 else 1161 O << " .align " << GVar->getAlignment(); 1162 1163 if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { 1164 O << " ."; 1165 // Special case: ABI requires that we use .u8 for predicates 1166 if (ETy->isIntegerTy(1)) 1167 O << "u8"; 1168 else 1169 O << getPTXFundamentalTypeStr(ETy, false); 1170 O << " "; 1171 getSymbol(GVar)->print(O, MAI); 1172 1173 // Ptx allows variable initilization only for constant and global state 1174 // spaces. 1175 if (GVar->hasInitializer()) { 1176 if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1177 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) { 1178 const Constant *Initializer = GVar->getInitializer(); 1179 // 'undef' is treated as there is no value specified. 1180 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) { 1181 O << " = "; 1182 printScalarConstant(Initializer, O); 1183 } 1184 } else { 1185 // The frontend adds zero-initializer to device and constant variables 1186 // that don't have an initial value, and UndefValue to shared 1187 // variables, so skip warning for this case. 1188 if (!GVar->getInitializer()->isNullValue() && 1189 !isa<UndefValue>(GVar->getInitializer())) { 1190 report_fatal_error("initial value of '" + GVar->getName() + 1191 "' is not allowed in addrspace(" + 1192 Twine(PTy->getAddressSpace()) + ")"); 1193 } 1194 } 1195 } 1196 } else { 1197 unsigned int ElementSize = 0; 1198 1199 // Although PTX has direct support for struct type and array type and 1200 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1201 // targets that support these high level field accesses. Structs, arrays 1202 // and vectors are lowered into arrays of bytes. 1203 switch (ETy->getTypeID()) { 1204 case Type::StructTyID: 1205 case Type::ArrayTyID: 1206 case Type::VectorTyID: 1207 ElementSize = DL.getTypeStoreSize(ETy); 1208 // Ptx allows variable initilization only for constant and 1209 // global state spaces. 1210 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1211 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1212 GVar->hasInitializer()) { 1213 const Constant *Initializer = GVar->getInitializer(); 1214 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1215 AggBuffer aggBuffer(ElementSize, O, *this); 1216 bufferAggregateConstant(Initializer, &aggBuffer); 1217 if (aggBuffer.numSymbols) { 1218 if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { 1219 O << " .u64 "; 1220 getSymbol(GVar)->print(O, MAI); 1221 O << "["; 1222 O << ElementSize / 8; 1223 } else { 1224 O << " .u32 "; 1225 getSymbol(GVar)->print(O, MAI); 1226 O << "["; 1227 O << ElementSize / 4; 1228 } 1229 O << "]"; 1230 } else { 1231 O << " .b8 "; 1232 getSymbol(GVar)->print(O, MAI); 1233 O << "["; 1234 O << ElementSize; 1235 O << "]"; 1236 } 1237 O << " = {"; 1238 aggBuffer.print(); 1239 O << "}"; 1240 } else { 1241 O << " .b8 "; 1242 getSymbol(GVar)->print(O, MAI); 1243 if (ElementSize) { 1244 O << "["; 1245 O << ElementSize; 1246 O << "]"; 1247 } 1248 } 1249 } else { 1250 O << " .b8 "; 1251 getSymbol(GVar)->print(O, MAI); 1252 if (ElementSize) { 1253 O << "["; 1254 O << ElementSize; 1255 O << "]"; 1256 } 1257 } 1258 break; 1259 default: 1260 llvm_unreachable("type not supported yet"); 1261 } 1262 1263 } 1264 O << ";\n"; 1265 } 1266 1267 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1268 if (localDecls.find(f) == localDecls.end()) 1269 return; 1270 1271 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 1272 1273 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1274 O << "\t// demoted variable\n\t"; 1275 printModuleLevelGV(gvars[i], O, true); 1276 } 1277 } 1278 1279 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1280 raw_ostream &O) const { 1281 switch (AddressSpace) { 1282 case llvm::ADDRESS_SPACE_LOCAL: 1283 O << "local"; 1284 break; 1285 case llvm::ADDRESS_SPACE_GLOBAL: 1286 O << "global"; 1287 break; 1288 case llvm::ADDRESS_SPACE_CONST: 1289 O << "const"; 1290 break; 1291 case llvm::ADDRESS_SPACE_SHARED: 1292 O << "shared"; 1293 break; 1294 default: 1295 report_fatal_error("Bad address space found while emitting PTX"); 1296 break; 1297 } 1298 } 1299 1300 std::string 1301 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const { 1302 switch (Ty->getTypeID()) { 1303 default: 1304 llvm_unreachable("unexpected type"); 1305 break; 1306 case Type::IntegerTyID: { 1307 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1308 if (NumBits == 1) 1309 return "pred"; 1310 else if (NumBits <= 64) { 1311 std::string name = "u"; 1312 return name + utostr(NumBits); 1313 } else { 1314 llvm_unreachable("Integer too large"); 1315 break; 1316 } 1317 break; 1318 } 1319 case Type::FloatTyID: 1320 return "f32"; 1321 case Type::DoubleTyID: 1322 return "f64"; 1323 case Type::PointerTyID: 1324 if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) 1325 if (useB4PTR) 1326 return "b64"; 1327 else 1328 return "u64"; 1329 else if (useB4PTR) 1330 return "b32"; 1331 else 1332 return "u32"; 1333 } 1334 llvm_unreachable("unexpected type"); 1335 return nullptr; 1336 } 1337 1338 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1339 raw_ostream &O) { 1340 1341 const DataLayout &DL = getDataLayout(); 1342 1343 // GlobalVariables are always constant pointers themselves. 1344 PointerType *PTy = GVar->getType(); 1345 Type *ETy = PTy->getElementType(); 1346 1347 O << "."; 1348 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1349 if (GVar->getAlignment() == 0) 1350 O << " .align " << (int)DL.getPrefTypeAlignment(ETy); 1351 else 1352 O << " .align " << GVar->getAlignment(); 1353 1354 if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { 1355 O << " ."; 1356 O << getPTXFundamentalTypeStr(ETy); 1357 O << " "; 1358 getSymbol(GVar)->print(O, MAI); 1359 return; 1360 } 1361 1362 int64_t ElementSize = 0; 1363 1364 // Although PTX has direct support for struct type and array type and LLVM IR 1365 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1366 // support these high level field accesses. Structs and arrays are lowered 1367 // into arrays of bytes. 1368 switch (ETy->getTypeID()) { 1369 case Type::StructTyID: 1370 case Type::ArrayTyID: 1371 case Type::VectorTyID: 1372 ElementSize = DL.getTypeStoreSize(ETy); 1373 O << " .b8 "; 1374 getSymbol(GVar)->print(O, MAI); 1375 O << "["; 1376 if (ElementSize) { 1377 O << ElementSize; 1378 } 1379 O << "]"; 1380 break; 1381 default: 1382 llvm_unreachable("type not supported yet"); 1383 } 1384 return; 1385 } 1386 1387 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) { 1388 if (Ty->isSingleValueType()) 1389 return DL.getPrefTypeAlignment(Ty); 1390 1391 auto *ATy = dyn_cast<ArrayType>(Ty); 1392 if (ATy) 1393 return getOpenCLAlignment(DL, ATy->getElementType()); 1394 1395 auto *STy = dyn_cast<StructType>(Ty); 1396 if (STy) { 1397 unsigned int alignStruct = 1; 1398 // Go through each element of the struct and find the 1399 // largest alignment. 1400 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1401 Type *ETy = STy->getElementType(i); 1402 unsigned int align = getOpenCLAlignment(DL, ETy); 1403 if (align > alignStruct) 1404 alignStruct = align; 1405 } 1406 return alignStruct; 1407 } 1408 1409 auto *FTy = dyn_cast<FunctionType>(Ty); 1410 if (FTy) 1411 return DL.getPointerPrefAlignment(); 1412 return DL.getPrefTypeAlignment(Ty); 1413 } 1414 1415 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1416 int paramIndex, raw_ostream &O) { 1417 getSymbol(I->getParent())->print(O, MAI); 1418 O << "_param_" << paramIndex; 1419 } 1420 1421 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1422 const DataLayout &DL = getDataLayout(); 1423 const AttributeSet &PAL = F->getAttributes(); 1424 const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); 1425 Function::const_arg_iterator I, E; 1426 unsigned paramIndex = 0; 1427 bool first = true; 1428 bool isKernelFunc = llvm::isKernelFunction(*F); 1429 bool isABI = (nvptxSubtarget->getSmVersion() >= 20); 1430 MVT thePointerTy = TLI->getPointerTy(DL); 1431 1432 O << "(\n"; 1433 1434 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1435 Type *Ty = I->getType(); 1436 1437 if (!first) 1438 O << ",\n"; 1439 1440 first = false; 1441 1442 // Handle image/sampler parameters 1443 if (isKernelFunction(*F)) { 1444 if (isSampler(*I) || isImage(*I)) { 1445 if (isImage(*I)) { 1446 std::string sname = I->getName(); 1447 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { 1448 if (nvptxSubtarget->hasImageHandles()) 1449 O << "\t.param .u64 .ptr .surfref "; 1450 else 1451 O << "\t.param .surfref "; 1452 CurrentFnSym->print(O, MAI); 1453 O << "_param_" << paramIndex; 1454 } 1455 else { // Default image is read_only 1456 if (nvptxSubtarget->hasImageHandles()) 1457 O << "\t.param .u64 .ptr .texref "; 1458 else 1459 O << "\t.param .texref "; 1460 CurrentFnSym->print(O, MAI); 1461 O << "_param_" << paramIndex; 1462 } 1463 } else { 1464 if (nvptxSubtarget->hasImageHandles()) 1465 O << "\t.param .u64 .ptr .samplerref "; 1466 else 1467 O << "\t.param .samplerref "; 1468 CurrentFnSym->print(O, MAI); 1469 O << "_param_" << paramIndex; 1470 } 1471 continue; 1472 } 1473 } 1474 1475 if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) { 1476 if (Ty->isAggregateType() || Ty->isVectorTy()) { 1477 // Just print .param .align <a> .b8 .param[size]; 1478 // <a> = PAL.getparamalignment 1479 // size = typeallocsize of element type 1480 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1481 if (align == 0) 1482 align = DL.getABITypeAlignment(Ty); 1483 1484 unsigned sz = DL.getTypeAllocSize(Ty); 1485 O << "\t.param .align " << align << " .b8 "; 1486 printParamName(I, paramIndex, O); 1487 O << "[" << sz << "]"; 1488 1489 continue; 1490 } 1491 // Just a scalar 1492 auto *PTy = dyn_cast<PointerType>(Ty); 1493 if (isKernelFunc) { 1494 if (PTy) { 1495 // Special handling for pointer arguments to kernel 1496 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1497 1498 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() != 1499 NVPTX::CUDA) { 1500 Type *ETy = PTy->getElementType(); 1501 int addrSpace = PTy->getAddressSpace(); 1502 switch (addrSpace) { 1503 default: 1504 O << ".ptr "; 1505 break; 1506 case llvm::ADDRESS_SPACE_CONST: 1507 O << ".ptr .const "; 1508 break; 1509 case llvm::ADDRESS_SPACE_SHARED: 1510 O << ".ptr .shared "; 1511 break; 1512 case llvm::ADDRESS_SPACE_GLOBAL: 1513 O << ".ptr .global "; 1514 break; 1515 } 1516 O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " "; 1517 } 1518 printParamName(I, paramIndex, O); 1519 continue; 1520 } 1521 1522 // non-pointer scalar to kernel func 1523 O << "\t.param ."; 1524 // Special case: predicate operands become .u8 types 1525 if (Ty->isIntegerTy(1)) 1526 O << "u8"; 1527 else 1528 O << getPTXFundamentalTypeStr(Ty); 1529 O << " "; 1530 printParamName(I, paramIndex, O); 1531 continue; 1532 } 1533 // Non-kernel function, just print .param .b<size> for ABI 1534 // and .reg .b<size> for non-ABI 1535 unsigned sz = 0; 1536 if (isa<IntegerType>(Ty)) { 1537 sz = cast<IntegerType>(Ty)->getBitWidth(); 1538 if (sz < 32) 1539 sz = 32; 1540 } else if (isa<PointerType>(Ty)) 1541 sz = thePointerTy.getSizeInBits(); 1542 else 1543 sz = Ty->getPrimitiveSizeInBits(); 1544 if (isABI) 1545 O << "\t.param .b" << sz << " "; 1546 else 1547 O << "\t.reg .b" << sz << " "; 1548 printParamName(I, paramIndex, O); 1549 continue; 1550 } 1551 1552 // param has byVal attribute. So should be a pointer 1553 auto *PTy = dyn_cast<PointerType>(Ty); 1554 assert(PTy && "Param with byval attribute should be a pointer type"); 1555 Type *ETy = PTy->getElementType(); 1556 1557 if (isABI || isKernelFunc) { 1558 // Just print .param .align <a> .b8 .param[size]; 1559 // <a> = PAL.getparamalignment 1560 // size = typeallocsize of element type 1561 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1562 if (align == 0) 1563 align = DL.getABITypeAlignment(ETy); 1564 1565 unsigned sz = DL.getTypeAllocSize(ETy); 1566 O << "\t.param .align " << align << " .b8 "; 1567 printParamName(I, paramIndex, O); 1568 O << "[" << sz << "]"; 1569 continue; 1570 } else { 1571 // Split the ETy into constituent parts and 1572 // print .param .b<size> <name> for each part. 1573 // Further, if a part is vector, print the above for 1574 // each vector element. 1575 SmallVector<EVT, 16> vtparts; 1576 ComputeValueVTs(*TLI, DL, ETy, vtparts); 1577 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1578 unsigned elems = 1; 1579 EVT elemtype = vtparts[i]; 1580 if (vtparts[i].isVector()) { 1581 elems = vtparts[i].getVectorNumElements(); 1582 elemtype = vtparts[i].getVectorElementType(); 1583 } 1584 1585 for (unsigned j = 0, je = elems; j != je; ++j) { 1586 unsigned sz = elemtype.getSizeInBits(); 1587 if (elemtype.isInteger() && (sz < 32)) 1588 sz = 32; 1589 O << "\t.reg .b" << sz << " "; 1590 printParamName(I, paramIndex, O); 1591 if (j < je - 1) 1592 O << ",\n"; 1593 ++paramIndex; 1594 } 1595 if (i < e - 1) 1596 O << ",\n"; 1597 } 1598 --paramIndex; 1599 continue; 1600 } 1601 } 1602 1603 O << "\n)\n"; 1604 } 1605 1606 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1607 raw_ostream &O) { 1608 const Function *F = MF.getFunction(); 1609 emitFunctionParamList(F, O); 1610 } 1611 1612 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1613 const MachineFunction &MF) { 1614 SmallString<128> Str; 1615 raw_svector_ostream O(Str); 1616 1617 // Map the global virtual register number to a register class specific 1618 // virtual register number starting from 1 with that class. 1619 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1620 //unsigned numRegClasses = TRI->getNumRegClasses(); 1621 1622 // Emit the Fake Stack Object 1623 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1624 int NumBytes = (int) MFI->getStackSize(); 1625 if (NumBytes) { 1626 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME 1627 << getFunctionNumber() << "[" << NumBytes << "];\n"; 1628 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { 1629 O << "\t.reg .b64 \t%SP;\n"; 1630 O << "\t.reg .b64 \t%SPL;\n"; 1631 } else { 1632 O << "\t.reg .b32 \t%SP;\n"; 1633 O << "\t.reg .b32 \t%SPL;\n"; 1634 } 1635 } 1636 1637 // Go through all virtual registers to establish the mapping between the 1638 // global virtual 1639 // register number and the per class virtual register number. 1640 // We use the per class virtual register number in the ptx output. 1641 unsigned int numVRs = MRI->getNumVirtRegs(); 1642 for (unsigned i = 0; i < numVRs; i++) { 1643 unsigned int vr = TRI->index2VirtReg(i); 1644 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1645 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1646 int n = regmap.size(); 1647 regmap.insert(std::make_pair(vr, n + 1)); 1648 } 1649 1650 // Emit register declarations 1651 // @TODO: Extract out the real register usage 1652 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1653 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1654 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1655 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1656 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n"; 1657 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1658 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n"; 1659 1660 // Emit declaration of the virtual registers or 'physical' registers for 1661 // each register class 1662 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 1663 const TargetRegisterClass *RC = TRI->getRegClass(i); 1664 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1665 std::string rcname = getNVPTXRegClassName(RC); 1666 std::string rcStr = getNVPTXRegClassStr(RC); 1667 int n = regmap.size(); 1668 1669 // Only declare those registers that may be used. 1670 if (n) { 1671 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1672 << ">;\n"; 1673 } 1674 } 1675 1676 OutStreamer->EmitRawText(O.str()); 1677 } 1678 1679 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1680 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1681 bool ignored; 1682 unsigned int numHex; 1683 const char *lead; 1684 1685 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 1686 numHex = 8; 1687 lead = "0f"; 1688 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); 1689 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1690 numHex = 16; 1691 lead = "0d"; 1692 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); 1693 } else 1694 llvm_unreachable("unsupported fp type"); 1695 1696 APInt API = APF.bitcastToAPInt(); 1697 std::string hexstr(utohexstr(API.getZExtValue())); 1698 O << lead; 1699 if (hexstr.length() < numHex) 1700 O << std::string(numHex - hexstr.length(), '0'); 1701 O << utohexstr(API.getZExtValue()); 1702 } 1703 1704 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 1705 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1706 O << CI->getValue(); 1707 return; 1708 } 1709 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1710 printFPConstant(CFP, O); 1711 return; 1712 } 1713 if (isa<ConstantPointerNull>(CPV)) { 1714 O << "0"; 1715 return; 1716 } 1717 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1718 PointerType *PTy = dyn_cast<PointerType>(GVar->getType()); 1719 bool IsNonGenericPointer = false; 1720 if (PTy && PTy->getAddressSpace() != 0) { 1721 IsNonGenericPointer = true; 1722 } 1723 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { 1724 O << "generic("; 1725 getSymbol(GVar)->print(O, MAI); 1726 O << ")"; 1727 } else { 1728 getSymbol(GVar)->print(O, MAI); 1729 } 1730 return; 1731 } 1732 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1733 const Value *v = Cexpr->stripPointerCasts(); 1734 PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType()); 1735 bool IsNonGenericPointer = false; 1736 if (PTy && PTy->getAddressSpace() != 0) { 1737 IsNonGenericPointer = true; 1738 } 1739 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1740 if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { 1741 O << "generic("; 1742 getSymbol(GVar)->print(O, MAI); 1743 O << ")"; 1744 } else { 1745 getSymbol(GVar)->print(O, MAI); 1746 } 1747 return; 1748 } else { 1749 lowerConstant(CPV)->print(O, MAI); 1750 return; 1751 } 1752 } 1753 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1754 } 1755 1756 // These utility functions assure we get the right sequence of bytes for a given 1757 // type even for big-endian machines 1758 template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) { 1759 int64_t vp = (int64_t)val; 1760 for (unsigned i = 0; i < sizeof(T); ++i) { 1761 p[i] = (unsigned char)vp; 1762 vp >>= 8; 1763 } 1764 } 1765 static void ConvertFloatToBytes(unsigned char *p, float val) { 1766 int32_t *vp = (int32_t *)&val; 1767 for (unsigned i = 0; i < sizeof(int32_t); ++i) { 1768 p[i] = (unsigned char)*vp; 1769 *vp >>= 8; 1770 } 1771 } 1772 static void ConvertDoubleToBytes(unsigned char *p, double val) { 1773 int64_t *vp = (int64_t *)&val; 1774 for (unsigned i = 0; i < sizeof(int64_t); ++i) { 1775 p[i] = (unsigned char)*vp; 1776 *vp >>= 8; 1777 } 1778 } 1779 1780 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 1781 AggBuffer *aggBuffer) { 1782 1783 const DataLayout &DL = getDataLayout(); 1784 1785 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1786 int s = DL.getTypeAllocSize(CPV->getType()); 1787 if (s < Bytes) 1788 s = Bytes; 1789 aggBuffer->addZeros(s); 1790 return; 1791 } 1792 1793 unsigned char ptr[8]; 1794 switch (CPV->getType()->getTypeID()) { 1795 1796 case Type::IntegerTyID: { 1797 Type *ETy = CPV->getType(); 1798 if (ETy == Type::getInt8Ty(CPV->getContext())) { 1799 unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); 1800 ConvertIntToBytes<>(ptr, c); 1801 aggBuffer->addBytes(ptr, 1, Bytes); 1802 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 1803 short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue(); 1804 ConvertIntToBytes<>(ptr, int16); 1805 aggBuffer->addBytes(ptr, 2, Bytes); 1806 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 1807 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1808 int int32 = (int)(constInt->getZExtValue()); 1809 ConvertIntToBytes<>(ptr, int32); 1810 aggBuffer->addBytes(ptr, 4, Bytes); 1811 break; 1812 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1813 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1814 ConstantFoldConstantExpression(Cexpr, DL))) { 1815 int int32 = (int)(constInt->getZExtValue()); 1816 ConvertIntToBytes<>(ptr, int32); 1817 aggBuffer->addBytes(ptr, 4, Bytes); 1818 break; 1819 } 1820 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1821 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1822 aggBuffer->addSymbol(v, Cexpr->getOperand(0)); 1823 aggBuffer->addZeros(4); 1824 break; 1825 } 1826 } 1827 llvm_unreachable("unsupported integer const type"); 1828 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 1829 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1830 long long int64 = (long long)(constInt->getZExtValue()); 1831 ConvertIntToBytes<>(ptr, int64); 1832 aggBuffer->addBytes(ptr, 8, Bytes); 1833 break; 1834 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1835 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1836 ConstantFoldConstantExpression(Cexpr, DL))) { 1837 long long int64 = (long long)(constInt->getZExtValue()); 1838 ConvertIntToBytes<>(ptr, int64); 1839 aggBuffer->addBytes(ptr, 8, Bytes); 1840 break; 1841 } 1842 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1843 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1844 aggBuffer->addSymbol(v, Cexpr->getOperand(0)); 1845 aggBuffer->addZeros(8); 1846 break; 1847 } 1848 } 1849 llvm_unreachable("unsupported integer const type"); 1850 } else 1851 llvm_unreachable("unsupported integer const type"); 1852 break; 1853 } 1854 case Type::FloatTyID: 1855 case Type::DoubleTyID: { 1856 const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 1857 Type *Ty = CFP->getType(); 1858 if (Ty == Type::getFloatTy(CPV->getContext())) { 1859 float float32 = (float) CFP->getValueAPF().convertToFloat(); 1860 ConvertFloatToBytes(ptr, float32); 1861 aggBuffer->addBytes(ptr, 4, Bytes); 1862 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1863 double float64 = CFP->getValueAPF().convertToDouble(); 1864 ConvertDoubleToBytes(ptr, float64); 1865 aggBuffer->addBytes(ptr, 8, Bytes); 1866 } else { 1867 llvm_unreachable("unsupported fp const type"); 1868 } 1869 break; 1870 } 1871 case Type::PointerTyID: { 1872 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1873 aggBuffer->addSymbol(GVar, GVar); 1874 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1875 const Value *v = Cexpr->stripPointerCasts(); 1876 aggBuffer->addSymbol(v, Cexpr); 1877 } 1878 unsigned int s = DL.getTypeAllocSize(CPV->getType()); 1879 aggBuffer->addZeros(s); 1880 break; 1881 } 1882 1883 case Type::ArrayTyID: 1884 case Type::VectorTyID: 1885 case Type::StructTyID: { 1886 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 1887 isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) { 1888 int ElementSize = DL.getTypeAllocSize(CPV->getType()); 1889 bufferAggregateConstant(CPV, aggBuffer); 1890 if (Bytes > ElementSize) 1891 aggBuffer->addZeros(Bytes - ElementSize); 1892 } else if (isa<ConstantAggregateZero>(CPV)) 1893 aggBuffer->addZeros(Bytes); 1894 else 1895 llvm_unreachable("Unexpected Constant type"); 1896 break; 1897 } 1898 1899 default: 1900 llvm_unreachable("unsupported type"); 1901 } 1902 } 1903 1904 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 1905 AggBuffer *aggBuffer) { 1906 const DataLayout &DL = getDataLayout(); 1907 int Bytes; 1908 1909 // Old constants 1910 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1911 if (CPV->getNumOperands()) 1912 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1913 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1914 return; 1915 } 1916 1917 if (const ConstantDataSequential *CDS = 1918 dyn_cast<ConstantDataSequential>(CPV)) { 1919 if (CDS->getNumElements()) 1920 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1921 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1922 aggBuffer); 1923 return; 1924 } 1925 1926 if (isa<ConstantStruct>(CPV)) { 1927 if (CPV->getNumOperands()) { 1928 StructType *ST = cast<StructType>(CPV->getType()); 1929 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1930 if (i == (e - 1)) 1931 Bytes = DL.getStructLayout(ST)->getElementOffset(0) + 1932 DL.getTypeAllocSize(ST) - 1933 DL.getStructLayout(ST)->getElementOffset(i); 1934 else 1935 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) - 1936 DL.getStructLayout(ST)->getElementOffset(i); 1937 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 1938 } 1939 } 1940 return; 1941 } 1942 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1943 } 1944 1945 // buildTypeNameMap - Run through symbol table looking for type names. 1946 // 1947 1948 1949 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 1950 switch (MI.getOpcode()) { 1951 default: 1952 return false; 1953 case NVPTX::CallArgBeginInst: 1954 case NVPTX::CallArgEndInst0: 1955 case NVPTX::CallArgEndInst1: 1956 case NVPTX::CallArgF32: 1957 case NVPTX::CallArgF64: 1958 case NVPTX::CallArgI16: 1959 case NVPTX::CallArgI32: 1960 case NVPTX::CallArgI32imm: 1961 case NVPTX::CallArgI64: 1962 case NVPTX::CallArgParam: 1963 case NVPTX::CallVoidInst: 1964 case NVPTX::CallVoidInstReg: 1965 case NVPTX::Callseq_End: 1966 case NVPTX::CallVoidInstReg64: 1967 case NVPTX::DeclareParamInst: 1968 case NVPTX::DeclareRetMemInst: 1969 case NVPTX::DeclareRetRegInst: 1970 case NVPTX::DeclareRetScalarInst: 1971 case NVPTX::DeclareScalarParamInst: 1972 case NVPTX::DeclareScalarRegInst: 1973 case NVPTX::StoreParamF32: 1974 case NVPTX::StoreParamF64: 1975 case NVPTX::StoreParamI16: 1976 case NVPTX::StoreParamI32: 1977 case NVPTX::StoreParamI64: 1978 case NVPTX::StoreParamI8: 1979 case NVPTX::StoreRetvalF32: 1980 case NVPTX::StoreRetvalF64: 1981 case NVPTX::StoreRetvalI16: 1982 case NVPTX::StoreRetvalI32: 1983 case NVPTX::StoreRetvalI64: 1984 case NVPTX::StoreRetvalI8: 1985 case NVPTX::LastCallArgF32: 1986 case NVPTX::LastCallArgF64: 1987 case NVPTX::LastCallArgI16: 1988 case NVPTX::LastCallArgI32: 1989 case NVPTX::LastCallArgI32imm: 1990 case NVPTX::LastCallArgI64: 1991 case NVPTX::LastCallArgParam: 1992 case NVPTX::LoadParamMemF32: 1993 case NVPTX::LoadParamMemF64: 1994 case NVPTX::LoadParamMemI16: 1995 case NVPTX::LoadParamMemI32: 1996 case NVPTX::LoadParamMemI64: 1997 case NVPTX::LoadParamMemI8: 1998 case NVPTX::PrototypeInst: 1999 case NVPTX::DBG_VALUE: 2000 return true; 2001 } 2002 return false; 2003 } 2004 2005 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly 2006 /// a copy from AsmPrinter::lowerConstant, except customized to only handle 2007 /// expressions that are representable in PTX and create 2008 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions. 2009 const MCExpr * 2010 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) { 2011 MCContext &Ctx = OutContext; 2012 2013 if (CV->isNullValue() || isa<UndefValue>(CV)) 2014 return MCConstantExpr::create(0, Ctx); 2015 2016 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 2017 return MCConstantExpr::create(CI->getZExtValue(), Ctx); 2018 2019 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { 2020 const MCSymbolRefExpr *Expr = 2021 MCSymbolRefExpr::create(getSymbol(GV), Ctx); 2022 if (ProcessingGeneric) { 2023 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx); 2024 } else { 2025 return Expr; 2026 } 2027 } 2028 2029 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 2030 if (!CE) { 2031 llvm_unreachable("Unknown constant value to lower!"); 2032 } 2033 2034 switch (CE->getOpcode()) { 2035 default: 2036 // If the code isn't optimized, there may be outstanding folding 2037 // opportunities. Attempt to fold the expression using DataLayout as a 2038 // last resort before giving up. 2039 if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout())) 2040 if (C != CE) 2041 return lowerConstantForGV(C, ProcessingGeneric); 2042 2043 // Otherwise report the problem to the user. 2044 { 2045 std::string S; 2046 raw_string_ostream OS(S); 2047 OS << "Unsupported expression in static initializer: "; 2048 CE->printAsOperand(OS, /*PrintType=*/false, 2049 !MF ? nullptr : MF->getFunction()->getParent()); 2050 report_fatal_error(OS.str()); 2051 } 2052 2053 case Instruction::AddrSpaceCast: { 2054 // Strip the addrspacecast and pass along the operand 2055 PointerType *DstTy = cast<PointerType>(CE->getType()); 2056 if (DstTy->getAddressSpace() == 0) { 2057 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true); 2058 } 2059 std::string S; 2060 raw_string_ostream OS(S); 2061 OS << "Unsupported expression in static initializer: "; 2062 CE->printAsOperand(OS, /*PrintType=*/ false, 2063 !MF ? 0 : MF->getFunction()->getParent()); 2064 report_fatal_error(OS.str()); 2065 } 2066 2067 case Instruction::GetElementPtr: { 2068 const DataLayout &DL = getDataLayout(); 2069 2070 // Generate a symbolic expression for the byte address 2071 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); 2072 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); 2073 2074 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0), 2075 ProcessingGeneric); 2076 if (!OffsetAI) 2077 return Base; 2078 2079 int64_t Offset = OffsetAI.getSExtValue(); 2080 return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx), 2081 Ctx); 2082 } 2083 2084 case Instruction::Trunc: 2085 // We emit the value and depend on the assembler to truncate the generated 2086 // expression properly. This is important for differences between 2087 // blockaddress labels. Since the two labels are in the same function, it 2088 // is reasonable to treat their delta as a 32-bit value. 2089 // FALL THROUGH. 2090 case Instruction::BitCast: 2091 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); 2092 2093 case Instruction::IntToPtr: { 2094 const DataLayout &DL = getDataLayout(); 2095 2096 // Handle casts to pointers by changing them into casts to the appropriate 2097 // integer type. This promotes constant folding and simplifies this code. 2098 Constant *Op = CE->getOperand(0); 2099 Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), 2100 false/*ZExt*/); 2101 return lowerConstantForGV(Op, ProcessingGeneric); 2102 } 2103 2104 case Instruction::PtrToInt: { 2105 const DataLayout &DL = getDataLayout(); 2106 2107 // Support only foldable casts to/from pointers that can be eliminated by 2108 // changing the pointer to the appropriately sized integer type. 2109 Constant *Op = CE->getOperand(0); 2110 Type *Ty = CE->getType(); 2111 2112 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric); 2113 2114 // We can emit the pointer value into this slot if the slot is an 2115 // integer slot equal to the size of the pointer. 2116 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) 2117 return OpExpr; 2118 2119 // Otherwise the pointer is smaller than the resultant integer, mask off 2120 // the high bits so we are sure to get a proper truncation if the input is 2121 // a constant expr. 2122 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); 2123 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx); 2124 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); 2125 } 2126 2127 // The MC library also has a right-shift operator, but it isn't consistently 2128 // signed or unsigned between different targets. 2129 case Instruction::Add: { 2130 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); 2131 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric); 2132 switch (CE->getOpcode()) { 2133 default: llvm_unreachable("Unknown binary operator constant cast expr"); 2134 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); 2135 } 2136 } 2137 } 2138 } 2139 2140 // Copy of MCExpr::print customized for NVPTX 2141 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { 2142 switch (Expr.getKind()) { 2143 case MCExpr::Target: 2144 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI); 2145 case MCExpr::Constant: 2146 OS << cast<MCConstantExpr>(Expr).getValue(); 2147 return; 2148 2149 case MCExpr::SymbolRef: { 2150 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr); 2151 const MCSymbol &Sym = SRE.getSymbol(); 2152 Sym.print(OS, MAI); 2153 return; 2154 } 2155 2156 case MCExpr::Unary: { 2157 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr); 2158 switch (UE.getOpcode()) { 2159 case MCUnaryExpr::LNot: OS << '!'; break; 2160 case MCUnaryExpr::Minus: OS << '-'; break; 2161 case MCUnaryExpr::Not: OS << '~'; break; 2162 case MCUnaryExpr::Plus: OS << '+'; break; 2163 } 2164 printMCExpr(*UE.getSubExpr(), OS); 2165 return; 2166 } 2167 2168 case MCExpr::Binary: { 2169 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr); 2170 2171 // Only print parens around the LHS if it is non-trivial. 2172 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) || 2173 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) { 2174 printMCExpr(*BE.getLHS(), OS); 2175 } else { 2176 OS << '('; 2177 printMCExpr(*BE.getLHS(), OS); 2178 OS<< ')'; 2179 } 2180 2181 switch (BE.getOpcode()) { 2182 case MCBinaryExpr::Add: 2183 // Print "X-42" instead of "X+-42". 2184 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) { 2185 if (RHSC->getValue() < 0) { 2186 OS << RHSC->getValue(); 2187 return; 2188 } 2189 } 2190 2191 OS << '+'; 2192 break; 2193 default: llvm_unreachable("Unhandled binary operator"); 2194 } 2195 2196 // Only print parens around the LHS if it is non-trivial. 2197 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) { 2198 printMCExpr(*BE.getRHS(), OS); 2199 } else { 2200 OS << '('; 2201 printMCExpr(*BE.getRHS(), OS); 2202 OS << ')'; 2203 } 2204 return; 2205 } 2206 } 2207 2208 llvm_unreachable("Invalid expression kind!"); 2209 } 2210 2211 /// PrintAsmOperand - Print out an operand for an inline asm expression. 2212 /// 2213 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 2214 unsigned AsmVariant, 2215 const char *ExtraCode, raw_ostream &O) { 2216 if (ExtraCode && ExtraCode[0]) { 2217 if (ExtraCode[1] != 0) 2218 return true; // Unknown modifier. 2219 2220 switch (ExtraCode[0]) { 2221 default: 2222 // See if this is a generic print operand 2223 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 2224 case 'r': 2225 break; 2226 } 2227 } 2228 2229 printOperand(MI, OpNo, O); 2230 2231 return false; 2232 } 2233 2234 bool NVPTXAsmPrinter::PrintAsmMemoryOperand( 2235 const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, 2236 const char *ExtraCode, raw_ostream &O) { 2237 if (ExtraCode && ExtraCode[0]) 2238 return true; // Unknown modifier 2239 2240 O << '['; 2241 printMemOperand(MI, OpNo, O); 2242 O << ']'; 2243 2244 return false; 2245 } 2246 2247 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 2248 raw_ostream &O, const char *Modifier) { 2249 const MachineOperand &MO = MI->getOperand(opNum); 2250 switch (MO.getType()) { 2251 case MachineOperand::MO_Register: 2252 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 2253 if (MO.getReg() == NVPTX::VRDepot) 2254 O << DEPOTNAME << getFunctionNumber(); 2255 else 2256 O << NVPTXInstPrinter::getRegisterName(MO.getReg()); 2257 } else { 2258 emitVirtualRegister(MO.getReg(), O); 2259 } 2260 return; 2261 2262 case MachineOperand::MO_Immediate: 2263 if (!Modifier) 2264 O << MO.getImm(); 2265 else if (strstr(Modifier, "vec") == Modifier) 2266 printVecModifiedImmediate(MO, Modifier, O); 2267 else 2268 llvm_unreachable( 2269 "Don't know how to handle modifier on immediate operand"); 2270 return; 2271 2272 case MachineOperand::MO_FPImmediate: 2273 printFPConstant(MO.getFPImm(), O); 2274 break; 2275 2276 case MachineOperand::MO_GlobalAddress: 2277 getSymbol(MO.getGlobal())->print(O, MAI); 2278 break; 2279 2280 case MachineOperand::MO_MachineBasicBlock: 2281 MO.getMBB()->getSymbol()->print(O, MAI); 2282 return; 2283 2284 default: 2285 llvm_unreachable("Operand type not supported."); 2286 } 2287 } 2288 2289 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 2290 raw_ostream &O, const char *Modifier) { 2291 printOperand(MI, opNum, O); 2292 2293 if (Modifier && !strcmp(Modifier, "add")) { 2294 O << ", "; 2295 printOperand(MI, opNum + 1, O); 2296 } else { 2297 if (MI->getOperand(opNum + 1).isImm() && 2298 MI->getOperand(opNum + 1).getImm() == 0) 2299 return; // don't print ',0' or '+0' 2300 O << "+"; 2301 printOperand(MI, opNum + 1, O); 2302 } 2303 } 2304 2305 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2306 std::stringstream temp; 2307 LineReader *reader = this->getReader(filename); 2308 temp << "\n//"; 2309 temp << filename.str(); 2310 temp << ":"; 2311 temp << line; 2312 temp << " "; 2313 temp << reader->readLine(line); 2314 temp << "\n"; 2315 this->OutStreamer->EmitRawText(temp.str()); 2316 } 2317 2318 LineReader *NVPTXAsmPrinter::getReader(std::string filename) { 2319 if (!reader) { 2320 reader = new LineReader(filename); 2321 } 2322 2323 if (reader->fileName() != filename) { 2324 delete reader; 2325 reader = new LineReader(filename); 2326 } 2327 2328 return reader; 2329 } 2330 2331 std::string LineReader::readLine(unsigned lineNum) { 2332 if (lineNum < theCurLine) { 2333 theCurLine = 0; 2334 fstr.seekg(0, std::ios::beg); 2335 } 2336 while (theCurLine < lineNum) { 2337 fstr.getline(buff, 500); 2338 theCurLine++; 2339 } 2340 return buff; 2341 } 2342 2343 // Force static initialization. 2344 extern "C" void LLVMInitializeNVPTXAsmPrinter() { 2345 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2346 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2347 } 2348