1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a printer that converts from our internal representation 11 // of machine-dependent LLVM code to NVPTX assembly language. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "NVPTXAsmPrinter.h" 16 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 17 #include "NVPTX.h" 18 #include "NVPTXInstrInfo.h" 19 #include "NVPTXMCExpr.h" 20 #include "NVPTXRegisterInfo.h" 21 #include "NVPTXTargetMachine.h" 22 #include "NVPTXUtilities.h" 23 #include "cl_common_defines.h" 24 #include "llvm/ADT/StringExtras.h" 25 #include "llvm/Analysis/ConstantFolding.h" 26 #include "llvm/Assembly/Writer.h" 27 #include "llvm/CodeGen/Analysis.h" 28 #include "llvm/CodeGen/MachineFrameInfo.h" 29 #include "llvm/CodeGen/MachineModuleInfo.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/DebugInfo.h" 32 #include "llvm/IR/DerivedTypes.h" 33 #include "llvm/IR/Function.h" 34 #include "llvm/IR/GlobalVariable.h" 35 #include "llvm/IR/Module.h" 36 #include "llvm/IR/Operator.h" 37 #include "llvm/MC/MCStreamer.h" 38 #include "llvm/MC/MCSymbol.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/FormattedStream.h" 42 #include "llvm/Support/Path.h" 43 #include "llvm/Support/TargetRegistry.h" 44 #include "llvm/Support/TimeValue.h" 45 #include "llvm/Target/Mangler.h" 46 #include "llvm/Target/TargetLoweringObjectFile.h" 47 #include <sstream> 48 using namespace llvm; 49 50 bool RegAllocNilUsed = true; 51 52 #define DEPOTNAME "__local_depot" 53 54 static cl::opt<bool> 55 EmitLineNumbers("nvptx-emit-line-numbers", 56 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 57 cl::init(true)); 58 59 namespace llvm { bool InterleaveSrcInPtx = false; } 60 61 static cl::opt<bool, true> 62 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, 63 cl::desc("NVPTX Specific: Emit source line in ptx file"), 64 cl::location(llvm::InterleaveSrcInPtx)); 65 66 namespace { 67 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 68 /// depends. 69 void DiscoverDependentGlobals(const Value *V, 70 DenseSet<const GlobalVariable *> &Globals) { 71 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 72 Globals.insert(GV); 73 else { 74 if (const User *U = dyn_cast<User>(V)) { 75 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 76 DiscoverDependentGlobals(U->getOperand(i), Globals); 77 } 78 } 79 } 80 } 81 82 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 83 /// instances to be emitted, but only after any dependents have been added 84 /// first. 85 void VisitGlobalVariableForEmission( 86 const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order, 87 DenseSet<const GlobalVariable *> &Visited, 88 DenseSet<const GlobalVariable *> &Visiting) { 89 // Have we already visited this one? 90 if (Visited.count(GV)) 91 return; 92 93 // Do we have a circular dependency? 94 if (Visiting.count(GV)) 95 report_fatal_error("Circular dependency found in global variable set"); 96 97 // Start visiting this global 98 Visiting.insert(GV); 99 100 // Make sure we visit all dependents first 101 DenseSet<const GlobalVariable *> Others; 102 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 103 DiscoverDependentGlobals(GV->getOperand(i), Others); 104 105 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 106 E = Others.end(); 107 I != E; ++I) 108 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 109 110 // Now we can visit ourself 111 Order.push_back(GV); 112 Visited.insert(GV); 113 Visiting.erase(GV); 114 } 115 } 116 117 // @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we 118 // cannot just link to the existing version. 119 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr. 120 /// 121 using namespace nvptx; 122 const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { 123 MCContext &Ctx = AP.OutContext; 124 125 if (CV->isNullValue() || isa<UndefValue>(CV)) 126 return MCConstantExpr::Create(0, Ctx); 127 128 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 129 return MCConstantExpr::Create(CI->getZExtValue(), Ctx); 130 131 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) 132 return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); 133 134 if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) 135 return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); 136 137 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 138 if (CE == 0) 139 llvm_unreachable("Unknown constant value to lower!"); 140 141 switch (CE->getOpcode()) { 142 default: 143 // If the code isn't optimized, there may be outstanding folding 144 // opportunities. Attempt to fold the expression using DataLayout as a 145 // last resort before giving up. 146 if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) 147 if (C != CE) 148 return LowerConstant(C, AP); 149 150 // Otherwise report the problem to the user. 151 { 152 std::string S; 153 raw_string_ostream OS(S); 154 OS << "Unsupported expression in static initializer: "; 155 WriteAsOperand(OS, CE, /*PrintType=*/ false, 156 !AP.MF ? 0 : AP.MF->getFunction()->getParent()); 157 report_fatal_error(OS.str()); 158 } 159 case Instruction::GetElementPtr: { 160 const DataLayout &TD = *AP.TM.getDataLayout(); 161 // Generate a symbolic expression for the byte address 162 APInt OffsetAI(TD.getPointerSizeInBits(), 0); 163 cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); 164 165 const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); 166 if (!OffsetAI) 167 return Base; 168 169 int64_t Offset = OffsetAI.getSExtValue(); 170 return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), 171 Ctx); 172 } 173 174 case Instruction::Trunc: 175 // We emit the value and depend on the assembler to truncate the generated 176 // expression properly. This is important for differences between 177 // blockaddress labels. Since the two labels are in the same function, it 178 // is reasonable to treat their delta as a 32-bit value. 179 // FALL THROUGH. 180 case Instruction::BitCast: 181 return LowerConstant(CE->getOperand(0), AP); 182 183 case Instruction::IntToPtr: { 184 const DataLayout &TD = *AP.TM.getDataLayout(); 185 // Handle casts to pointers by changing them into casts to the appropriate 186 // integer type. This promotes constant folding and simplifies this code. 187 Constant *Op = CE->getOperand(0); 188 Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), 189 false /*ZExt*/); 190 return LowerConstant(Op, AP); 191 } 192 193 case Instruction::PtrToInt: { 194 const DataLayout &TD = *AP.TM.getDataLayout(); 195 // Support only foldable casts to/from pointers that can be eliminated by 196 // changing the pointer to the appropriately sized integer type. 197 Constant *Op = CE->getOperand(0); 198 Type *Ty = CE->getType(); 199 200 const MCExpr *OpExpr = LowerConstant(Op, AP); 201 202 // We can emit the pointer value into this slot if the slot is an 203 // integer slot equal to the size of the pointer. 204 if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) 205 return OpExpr; 206 207 // Otherwise the pointer is smaller than the resultant integer, mask off 208 // the high bits so we are sure to get a proper truncation if the input is 209 // a constant expr. 210 unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); 211 const MCExpr *MaskExpr = 212 MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); 213 return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); 214 } 215 216 // The MC library also has a right-shift operator, but it isn't consistently 217 // signed or unsigned between different targets. 218 case Instruction::Add: 219 case Instruction::Sub: 220 case Instruction::Mul: 221 case Instruction::SDiv: 222 case Instruction::SRem: 223 case Instruction::Shl: 224 case Instruction::And: 225 case Instruction::Or: 226 case Instruction::Xor: { 227 const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); 228 const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); 229 switch (CE->getOpcode()) { 230 default: 231 llvm_unreachable("Unknown binary operator constant cast expr"); 232 case Instruction::Add: 233 return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); 234 case Instruction::Sub: 235 return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); 236 case Instruction::Mul: 237 return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); 238 case Instruction::SDiv: 239 return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); 240 case Instruction::SRem: 241 return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); 242 case Instruction::Shl: 243 return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); 244 case Instruction::And: 245 return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); 246 case Instruction::Or: 247 return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); 248 case Instruction::Xor: 249 return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); 250 } 251 } 252 } 253 } 254 255 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 256 if (!EmitLineNumbers) 257 return; 258 if (ignoreLoc(MI)) 259 return; 260 261 DebugLoc curLoc = MI.getDebugLoc(); 262 263 if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) 264 return; 265 266 if (prevDebugLoc == curLoc) 267 return; 268 269 prevDebugLoc = curLoc; 270 271 if (curLoc.isUnknown()) 272 return; 273 274 const MachineFunction *MF = MI.getParent()->getParent(); 275 //const TargetMachine &TM = MF->getTarget(); 276 277 const LLVMContext &ctx = MF->getFunction()->getContext(); 278 DIScope Scope(curLoc.getScope(ctx)); 279 280 assert((!Scope || Scope.isScope()) && 281 "Scope of a DebugLoc should be null or a DIScope."); 282 if (!Scope) 283 return; 284 285 StringRef fileName(Scope.getFilename()); 286 StringRef dirName(Scope.getDirectory()); 287 SmallString<128> FullPathName = dirName; 288 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 289 sys::path::append(FullPathName, fileName); 290 fileName = FullPathName.str(); 291 } 292 293 if (filenameMap.find(fileName.str()) == filenameMap.end()) 294 return; 295 296 // Emit the line from the source file. 297 if (llvm::InterleaveSrcInPtx) 298 this->emitSrcInText(fileName.str(), curLoc.getLine()); 299 300 std::stringstream temp; 301 temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() 302 << " " << curLoc.getCol(); 303 OutStreamer.EmitRawText(Twine(temp.str().c_str())); 304 } 305 306 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 307 SmallString<128> Str; 308 raw_svector_ostream OS(Str); 309 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 310 emitLineNumberAsDotLoc(*MI); 311 312 MCInst Inst; 313 lowerToMCInst(MI, Inst); 314 OutStreamer.EmitInstruction(Inst); 315 } 316 317 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 318 OutMI.setOpcode(MI->getOpcode()); 319 320 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 321 const MachineOperand &MO = MI->getOperand(i); 322 323 MCOperand MCOp; 324 if (lowerOperand(MO, MCOp)) 325 OutMI.addOperand(MCOp); 326 } 327 } 328 329 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 330 MCOperand &MCOp) { 331 switch (MO.getType()) { 332 default: llvm_unreachable("unknown operand type"); 333 case MachineOperand::MO_Register: 334 MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg())); 335 break; 336 case MachineOperand::MO_Immediate: 337 MCOp = MCOperand::CreateImm(MO.getImm()); 338 break; 339 case MachineOperand::MO_MachineBasicBlock: 340 MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( 341 MO.getMBB()->getSymbol(), OutContext)); 342 break; 343 case MachineOperand::MO_ExternalSymbol: 344 MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); 345 break; 346 case MachineOperand::MO_GlobalAddress: 347 MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal())); 348 break; 349 case MachineOperand::MO_FPImmediate: { 350 const ConstantFP *Cnt = MO.getFPImm(); 351 APFloat Val = Cnt->getValueAPF(); 352 353 switch (Cnt->getType()->getTypeID()) { 354 default: report_fatal_error("Unsupported FP type"); break; 355 case Type::FloatTyID: 356 MCOp = MCOperand::CreateExpr( 357 NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext)); 358 break; 359 case Type::DoubleTyID: 360 MCOp = MCOperand::CreateExpr( 361 NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext)); 362 break; 363 } 364 break; 365 } 366 } 367 return true; 368 } 369 370 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 371 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 372 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 373 374 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 375 unsigned RegNum = RegMap[Reg]; 376 377 // Encode the register class in the upper 4 bits 378 // Must be kept in sync with NVPTXInstPrinter::printRegName 379 unsigned Ret = 0; 380 if (RC == &NVPTX::Int1RegsRegClass) { 381 Ret = (1 << 28); 382 } else if (RC == &NVPTX::Int16RegsRegClass) { 383 Ret = (2 << 28); 384 } else if (RC == &NVPTX::Int32RegsRegClass) { 385 Ret = (3 << 28); 386 } else if (RC == &NVPTX::Int64RegsRegClass) { 387 Ret = (4 << 28); 388 } else if (RC == &NVPTX::Float32RegsRegClass) { 389 Ret = (5 << 28); 390 } else if (RC == &NVPTX::Float64RegsRegClass) { 391 Ret = (6 << 28); 392 } else { 393 report_fatal_error("Bad register class"); 394 } 395 396 // Insert the vreg number 397 Ret |= (RegNum & 0x0FFFFFFF); 398 return Ret; 399 } else { 400 // Some special-use registers are actually physical registers. 401 // Encode this as the register class ID of 0 and the real register ID. 402 return Reg & 0x0FFFFFFF; 403 } 404 } 405 406 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, 407 const MCSymbol *Symbol) { 408 const MCExpr *Expr; 409 Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, 410 OutContext); 411 return MCOperand::CreateExpr(Expr); 412 } 413 414 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 415 const DataLayout *TD = TM.getDataLayout(); 416 const TargetLowering *TLI = TM.getTargetLowering(); 417 418 Type *Ty = F->getReturnType(); 419 420 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 421 422 if (Ty->getTypeID() == Type::VoidTyID) 423 return; 424 425 O << " ("; 426 427 if (isABI) { 428 if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { 429 unsigned size = 0; 430 if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { 431 size = ITy->getBitWidth(); 432 if (size < 32) 433 size = 32; 434 } else { 435 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 436 size = Ty->getPrimitiveSizeInBits(); 437 } 438 439 O << ".param .b" << size << " func_retval0"; 440 } else if (isa<PointerType>(Ty)) { 441 O << ".param .b" << TLI->getPointerTy().getSizeInBits() 442 << " func_retval0"; 443 } else { 444 if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { 445 SmallVector<EVT, 16> vtparts; 446 ComputeValueVTs(*TLI, Ty, vtparts); 447 unsigned totalsz = 0; 448 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 449 unsigned elems = 1; 450 EVT elemtype = vtparts[i]; 451 if (vtparts[i].isVector()) { 452 elems = vtparts[i].getVectorNumElements(); 453 elemtype = vtparts[i].getVectorElementType(); 454 } 455 for (unsigned j = 0, je = elems; j != je; ++j) { 456 unsigned sz = elemtype.getSizeInBits(); 457 if (elemtype.isInteger() && (sz < 8)) 458 sz = 8; 459 totalsz += sz / 8; 460 } 461 } 462 unsigned retAlignment = 0; 463 if (!llvm::getAlign(*F, 0, retAlignment)) 464 retAlignment = TD->getABITypeAlignment(Ty); 465 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 466 << "]"; 467 } else 468 assert(false && "Unknown return type"); 469 } 470 } else { 471 SmallVector<EVT, 16> vtparts; 472 ComputeValueVTs(*TLI, Ty, vtparts); 473 unsigned idx = 0; 474 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 475 unsigned elems = 1; 476 EVT elemtype = vtparts[i]; 477 if (vtparts[i].isVector()) { 478 elems = vtparts[i].getVectorNumElements(); 479 elemtype = vtparts[i].getVectorElementType(); 480 } 481 482 for (unsigned j = 0, je = elems; j != je; ++j) { 483 unsigned sz = elemtype.getSizeInBits(); 484 if (elemtype.isInteger() && (sz < 32)) 485 sz = 32; 486 O << ".reg .b" << sz << " func_retval" << idx; 487 if (j < je - 1) 488 O << ", "; 489 ++idx; 490 } 491 if (i < e - 1) 492 O << ", "; 493 } 494 } 495 O << ") "; 496 return; 497 } 498 499 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 500 raw_ostream &O) { 501 const Function *F = MF.getFunction(); 502 printReturnValStr(F, O); 503 } 504 505 void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 506 SmallString<128> Str; 507 raw_svector_ostream O(Str); 508 509 if (!GlobalsEmitted) { 510 emitGlobals(*MF->getFunction()->getParent()); 511 GlobalsEmitted = true; 512 } 513 514 // Set up 515 MRI = &MF->getRegInfo(); 516 F = MF->getFunction(); 517 emitLinkageDirective(F, O); 518 if (llvm::isKernelFunction(*F)) 519 O << ".entry "; 520 else { 521 O << ".func "; 522 printReturnValStr(*MF, O); 523 } 524 525 O << *CurrentFnSym; 526 527 emitFunctionParamList(*MF, O); 528 529 if (llvm::isKernelFunction(*F)) 530 emitKernelFunctionDirectives(*F, O); 531 532 OutStreamer.EmitRawText(O.str()); 533 534 prevDebugLoc = DebugLoc(); 535 } 536 537 void NVPTXAsmPrinter::EmitFunctionBodyStart() { 538 VRegMapping.clear(); 539 OutStreamer.EmitRawText(StringRef("{\n")); 540 setAndEmitFunctionVirtualRegisters(*MF); 541 542 SmallString<128> Str; 543 raw_svector_ostream O(Str); 544 emitDemotedVars(MF->getFunction(), O); 545 OutStreamer.EmitRawText(O.str()); 546 } 547 548 void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 549 OutStreamer.EmitRawText(StringRef("}\n")); 550 VRegMapping.clear(); 551 } 552 553 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 554 raw_ostream &O) const { 555 // If the NVVM IR has some of reqntid* specified, then output 556 // the reqntid directive, and set the unspecified ones to 1. 557 // If none of reqntid* is specified, don't output reqntid directive. 558 unsigned reqntidx, reqntidy, reqntidz; 559 bool specified = false; 560 if (llvm::getReqNTIDx(F, reqntidx) == false) 561 reqntidx = 1; 562 else 563 specified = true; 564 if (llvm::getReqNTIDy(F, reqntidy) == false) 565 reqntidy = 1; 566 else 567 specified = true; 568 if (llvm::getReqNTIDz(F, reqntidz) == false) 569 reqntidz = 1; 570 else 571 specified = true; 572 573 if (specified) 574 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 575 << "\n"; 576 577 // If the NVVM IR has some of maxntid* specified, then output 578 // the maxntid directive, and set the unspecified ones to 1. 579 // If none of maxntid* is specified, don't output maxntid directive. 580 unsigned maxntidx, maxntidy, maxntidz; 581 specified = false; 582 if (llvm::getMaxNTIDx(F, maxntidx) == false) 583 maxntidx = 1; 584 else 585 specified = true; 586 if (llvm::getMaxNTIDy(F, maxntidy) == false) 587 maxntidy = 1; 588 else 589 specified = true; 590 if (llvm::getMaxNTIDz(F, maxntidz) == false) 591 maxntidz = 1; 592 else 593 specified = true; 594 595 if (specified) 596 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 597 << "\n"; 598 599 unsigned mincta; 600 if (llvm::getMinCTASm(F, mincta)) 601 O << ".minnctapersm " << mincta << "\n"; 602 } 603 604 void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, 605 raw_ostream &O) { 606 const TargetRegisterClass *RC = MRI->getRegClass(vr); 607 608 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 609 unsigned mapped_vr = regmap[vr]; 610 611 if (!isVec) { 612 O << getNVPTXRegClassStr(RC) << mapped_vr; 613 return; 614 } 615 report_fatal_error("Bad register!"); 616 } 617 618 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, 619 raw_ostream &O) { 620 getVirtualRegisterName(vr, isVec, O); 621 } 622 623 void NVPTXAsmPrinter::printVecModifiedImmediate( 624 const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 625 static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 626 int Imm = (int) MO.getImm(); 627 if (0 == strcmp(Modifier, "vecelem")) 628 O << "_" << vecelem[Imm]; 629 else if (0 == strcmp(Modifier, "vecv4comm1")) { 630 if ((Imm < 0) || (Imm > 3)) 631 O << "//"; 632 } else if (0 == strcmp(Modifier, "vecv4comm2")) { 633 if ((Imm < 4) || (Imm > 7)) 634 O << "//"; 635 } else if (0 == strcmp(Modifier, "vecv4pos")) { 636 if (Imm < 0) 637 Imm = 0; 638 O << "_" << vecelem[Imm % 4]; 639 } else if (0 == strcmp(Modifier, "vecv2comm1")) { 640 if ((Imm < 0) || (Imm > 1)) 641 O << "//"; 642 } else if (0 == strcmp(Modifier, "vecv2comm2")) { 643 if ((Imm < 2) || (Imm > 3)) 644 O << "//"; 645 } else if (0 == strcmp(Modifier, "vecv2pos")) { 646 if (Imm < 0) 647 Imm = 0; 648 O << "_" << vecelem[Imm % 2]; 649 } else 650 llvm_unreachable("Unknown Modifier on immediate operand"); 651 } 652 653 654 655 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 656 657 emitLinkageDirective(F, O); 658 if (llvm::isKernelFunction(*F)) 659 O << ".entry "; 660 else 661 O << ".func "; 662 printReturnValStr(F, O); 663 O << *Mang->getSymbol(F) << "\n"; 664 emitFunctionParamList(F, O); 665 O << ";\n"; 666 } 667 668 static bool usedInGlobalVarDef(const Constant *C) { 669 if (!C) 670 return false; 671 672 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 673 if (GV->getName().str() == "llvm.used") 674 return false; 675 return true; 676 } 677 678 for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 679 ui != ue; ++ui) { 680 const Constant *C = dyn_cast<Constant>(*ui); 681 if (usedInGlobalVarDef(C)) 682 return true; 683 } 684 return false; 685 } 686 687 static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 688 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 689 if (othergv->getName().str() == "llvm.used") 690 return true; 691 } 692 693 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 694 if (instr->getParent() && instr->getParent()->getParent()) { 695 const Function *curFunc = instr->getParent()->getParent(); 696 if (oneFunc && (curFunc != oneFunc)) 697 return false; 698 oneFunc = curFunc; 699 return true; 700 } else 701 return false; 702 } 703 704 if (const MDNode *md = dyn_cast<MDNode>(U)) 705 if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || 706 (md->getName().str() == "llvm.dbg.sp"))) 707 return true; 708 709 for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end(); 710 ui != ue; ++ui) { 711 if (usedInOneFunc(*ui, oneFunc) == false) 712 return false; 713 } 714 return true; 715 } 716 717 /* Find out if a global variable can be demoted to local scope. 718 * Currently, this is valid for CUDA shared variables, which have local 719 * scope and global lifetime. So the conditions to check are : 720 * 1. Is the global variable in shared address space? 721 * 2. Does it have internal linkage? 722 * 3. Is the global variable referenced only in one function? 723 */ 724 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 725 if (gv->hasInternalLinkage() == false) 726 return false; 727 const PointerType *Pty = gv->getType(); 728 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 729 return false; 730 731 const Function *oneFunc = 0; 732 733 bool flag = usedInOneFunc(gv, oneFunc); 734 if (flag == false) 735 return false; 736 if (!oneFunc) 737 return false; 738 f = oneFunc; 739 return true; 740 } 741 742 static bool useFuncSeen(const Constant *C, 743 llvm::DenseMap<const Function *, bool> &seenMap) { 744 for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 745 ui != ue; ++ui) { 746 if (const Constant *cu = dyn_cast<Constant>(*ui)) { 747 if (useFuncSeen(cu, seenMap)) 748 return true; 749 } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { 750 const BasicBlock *bb = I->getParent(); 751 if (!bb) 752 continue; 753 const Function *caller = bb->getParent(); 754 if (!caller) 755 continue; 756 if (seenMap.find(caller) != seenMap.end()) 757 return true; 758 } 759 } 760 return false; 761 } 762 763 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 764 llvm::DenseMap<const Function *, bool> seenMap; 765 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 766 const Function *F = FI; 767 768 if (F->isDeclaration()) { 769 if (F->use_empty()) 770 continue; 771 if (F->getIntrinsicID()) 772 continue; 773 emitDeclaration(F, O); 774 continue; 775 } 776 for (Value::const_use_iterator iter = F->use_begin(), 777 iterEnd = F->use_end(); 778 iter != iterEnd; ++iter) { 779 if (const Constant *C = dyn_cast<Constant>(*iter)) { 780 if (usedInGlobalVarDef(C)) { 781 // The use is in the initialization of a global variable 782 // that is a function pointer, so print a declaration 783 // for the original function 784 emitDeclaration(F, O); 785 break; 786 } 787 // Emit a declaration of this function if the function that 788 // uses this constant expr has already been seen. 789 if (useFuncSeen(C, seenMap)) { 790 emitDeclaration(F, O); 791 break; 792 } 793 } 794 795 if (!isa<Instruction>(*iter)) 796 continue; 797 const Instruction *instr = cast<Instruction>(*iter); 798 const BasicBlock *bb = instr->getParent(); 799 if (!bb) 800 continue; 801 const Function *caller = bb->getParent(); 802 if (!caller) 803 continue; 804 805 // If a caller has already been seen, then the caller is 806 // appearing in the module before the callee. so print out 807 // a declaration for the callee. 808 if (seenMap.find(caller) != seenMap.end()) { 809 emitDeclaration(F, O); 810 break; 811 } 812 } 813 seenMap[F] = true; 814 } 815 } 816 817 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 818 DebugInfoFinder DbgFinder; 819 DbgFinder.processModule(M); 820 821 unsigned i = 1; 822 for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), 823 E = DbgFinder.compile_unit_end(); 824 I != E; ++I) { 825 DICompileUnit DIUnit(*I); 826 StringRef Filename(DIUnit.getFilename()); 827 StringRef Dirname(DIUnit.getDirectory()); 828 SmallString<128> FullPathName = Dirname; 829 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 830 sys::path::append(FullPathName, Filename); 831 Filename = FullPathName.str(); 832 } 833 if (filenameMap.find(Filename.str()) != filenameMap.end()) 834 continue; 835 filenameMap[Filename.str()] = i; 836 OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); 837 ++i; 838 } 839 840 for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), 841 E = DbgFinder.subprogram_end(); 842 I != E; ++I) { 843 DISubprogram SP(*I); 844 StringRef Filename(SP.getFilename()); 845 StringRef Dirname(SP.getDirectory()); 846 SmallString<128> FullPathName = Dirname; 847 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 848 sys::path::append(FullPathName, Filename); 849 Filename = FullPathName.str(); 850 } 851 if (filenameMap.find(Filename.str()) != filenameMap.end()) 852 continue; 853 filenameMap[Filename.str()] = i; 854 ++i; 855 } 856 } 857 858 bool NVPTXAsmPrinter::doInitialization(Module &M) { 859 860 SmallString<128> Str1; 861 raw_svector_ostream OS1(Str1); 862 863 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 864 MMI->AnalyzeModule(M); 865 866 // We need to call the parent's one explicitly. 867 //bool Result = AsmPrinter::doInitialization(M); 868 869 // Initialize TargetLoweringObjectFile. 870 const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 871 .Initialize(OutContext, TM); 872 873 Mang = new Mangler(OutContext, &TM); 874 875 // Emit header before any dwarf directives are emitted below. 876 emitHeader(M, OS1); 877 OutStreamer.EmitRawText(OS1.str()); 878 879 // Already commented out 880 //bool Result = AsmPrinter::doInitialization(M); 881 882 // Emit module-level inline asm if it exists. 883 if (!M.getModuleInlineAsm().empty()) { 884 OutStreamer.AddComment("Start of file scope inline assembly"); 885 OutStreamer.AddBlankLine(); 886 OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm())); 887 OutStreamer.AddBlankLine(); 888 OutStreamer.AddComment("End of file scope inline assembly"); 889 OutStreamer.AddBlankLine(); 890 } 891 892 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 893 recordAndEmitFilenames(M); 894 895 GlobalsEmitted = false; 896 897 return false; // success 898 } 899 900 void NVPTXAsmPrinter::emitGlobals(const Module &M) { 901 SmallString<128> Str2; 902 raw_svector_ostream OS2(Str2); 903 904 emitDeclarations(M, OS2); 905 906 // As ptxas does not support forward references of globals, we need to first 907 // sort the list of module-level globals in def-use order. We visit each 908 // global variable in order, and ensure that we emit it *after* its dependent 909 // globals. We use a little extra memory maintaining both a set and a list to 910 // have fast searches while maintaining a strict ordering. 911 SmallVector<const GlobalVariable *, 8> Globals; 912 DenseSet<const GlobalVariable *> GVVisited; 913 DenseSet<const GlobalVariable *> GVVisiting; 914 915 // Visit each global variable, in order 916 for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); 917 I != E; ++I) 918 VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); 919 920 assert(GVVisited.size() == M.getGlobalList().size() && 921 "Missed a global variable"); 922 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 923 924 // Print out module-level global variables in proper order 925 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 926 printModuleLevelGV(Globals[i], OS2); 927 928 OS2 << '\n'; 929 930 OutStreamer.EmitRawText(OS2.str()); 931 } 932 933 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { 934 O << "//\n"; 935 O << "// Generated by LLVM NVPTX Back-End\n"; 936 O << "//\n"; 937 O << "\n"; 938 939 unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); 940 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 941 942 O << ".target "; 943 O << nvptxSubtarget.getTargetName(); 944 945 if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) 946 O << ", texmode_independent"; 947 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 948 if (!nvptxSubtarget.hasDouble()) 949 O << ", map_f64_to_f32"; 950 } 951 952 if (MAI->doesSupportDebugInformation()) 953 O << ", debug"; 954 955 O << "\n"; 956 957 O << ".address_size "; 958 if (nvptxSubtarget.is64Bit()) 959 O << "64"; 960 else 961 O << "32"; 962 O << "\n"; 963 964 O << "\n"; 965 } 966 967 bool NVPTXAsmPrinter::doFinalization(Module &M) { 968 969 // If we did not emit any functions, then the global declarations have not 970 // yet been emitted. 971 if (!GlobalsEmitted) { 972 emitGlobals(M); 973 GlobalsEmitted = true; 974 } 975 976 // XXX Temproarily remove global variables so that doFinalization() will not 977 // emit them again (global variables are emitted at beginning). 978 979 Module::GlobalListType &global_list = M.getGlobalList(); 980 int i, n = global_list.size(); 981 GlobalVariable **gv_array = new GlobalVariable *[n]; 982 983 // first, back-up GlobalVariable in gv_array 984 i = 0; 985 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 986 I != E; ++I) 987 gv_array[i++] = &*I; 988 989 // second, empty global_list 990 while (!global_list.empty()) 991 global_list.remove(global_list.begin()); 992 993 // call doFinalization 994 bool ret = AsmPrinter::doFinalization(M); 995 996 // now we restore global variables 997 for (i = 0; i < n; i++) 998 global_list.insert(global_list.end(), gv_array[i]); 999 1000 delete[] gv_array; 1001 return ret; 1002 1003 //bool Result = AsmPrinter::doFinalization(M); 1004 // Instead of calling the parents doFinalization, we may 1005 // clone parents doFinalization and customize here. 1006 // Currently, we if NVISA out the EmitGlobals() in 1007 // parent's doFinalization, which is too intrusive. 1008 // 1009 // Same for the doInitialization. 1010 //return Result; 1011 } 1012 1013 // This function emits appropriate linkage directives for 1014 // functions and global variables. 1015 // 1016 // extern function declaration -> .extern 1017 // extern function definition -> .visible 1018 // external global variable with init -> .visible 1019 // external without init -> .extern 1020 // appending -> not allowed, assert. 1021 1022 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 1023 raw_ostream &O) { 1024 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 1025 if (V->hasExternalLinkage()) { 1026 if (isa<GlobalVariable>(V)) { 1027 const GlobalVariable *GVar = cast<GlobalVariable>(V); 1028 if (GVar) { 1029 if (GVar->hasInitializer()) 1030 O << ".visible "; 1031 else 1032 O << ".extern "; 1033 } 1034 } else if (V->isDeclaration()) 1035 O << ".extern "; 1036 else 1037 O << ".visible "; 1038 } else if (V->hasAppendingLinkage()) { 1039 std::string msg; 1040 msg.append("Error: "); 1041 msg.append("Symbol "); 1042 if (V->hasName()) 1043 msg.append(V->getName().str()); 1044 msg.append("has unsupported appending linkage type"); 1045 llvm_unreachable(msg.c_str()); 1046 } 1047 } 1048 } 1049 1050 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 1051 raw_ostream &O, 1052 bool processDemoted) { 1053 1054 // Skip meta data 1055 if (GVar->hasSection()) { 1056 if (GVar->getSection() == "llvm.metadata") 1057 return; 1058 } 1059 1060 const DataLayout *TD = TM.getDataLayout(); 1061 1062 // GlobalVariables are always constant pointers themselves. 1063 const PointerType *PTy = GVar->getType(); 1064 Type *ETy = PTy->getElementType(); 1065 1066 if (GVar->hasExternalLinkage()) { 1067 if (GVar->hasInitializer()) 1068 O << ".visible "; 1069 else 1070 O << ".extern "; 1071 } 1072 1073 if (llvm::isTexture(*GVar)) { 1074 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 1075 return; 1076 } 1077 1078 if (llvm::isSurface(*GVar)) { 1079 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 1080 return; 1081 } 1082 1083 if (GVar->isDeclaration()) { 1084 // (extern) declarations, no definition or initializer 1085 // Currently the only known declaration is for an automatic __local 1086 // (.shared) promoted to global. 1087 emitPTXGlobalVariable(GVar, O); 1088 O << ";\n"; 1089 return; 1090 } 1091 1092 if (llvm::isSampler(*GVar)) { 1093 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 1094 1095 const Constant *Initializer = NULL; 1096 if (GVar->hasInitializer()) 1097 Initializer = GVar->getInitializer(); 1098 const ConstantInt *CI = NULL; 1099 if (Initializer) 1100 CI = dyn_cast<ConstantInt>(Initializer); 1101 if (CI) { 1102 unsigned sample = CI->getZExtValue(); 1103 1104 O << " = { "; 1105 1106 for (int i = 0, 1107 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1108 i < 3; i++) { 1109 O << "addr_mode_" << i << " = "; 1110 switch (addr) { 1111 case 0: 1112 O << "wrap"; 1113 break; 1114 case 1: 1115 O << "clamp_to_border"; 1116 break; 1117 case 2: 1118 O << "clamp_to_edge"; 1119 break; 1120 case 3: 1121 O << "wrap"; 1122 break; 1123 case 4: 1124 O << "mirror"; 1125 break; 1126 } 1127 O << ", "; 1128 } 1129 O << "filter_mode = "; 1130 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1131 case 0: 1132 O << "nearest"; 1133 break; 1134 case 1: 1135 O << "linear"; 1136 break; 1137 case 2: 1138 assert(0 && "Anisotropic filtering is not supported"); 1139 default: 1140 O << "nearest"; 1141 break; 1142 } 1143 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1144 O << ", force_unnormalized_coords = 1"; 1145 } 1146 O << " }"; 1147 } 1148 1149 O << ";\n"; 1150 return; 1151 } 1152 1153 if (GVar->hasPrivateLinkage()) { 1154 1155 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 1156 return; 1157 1158 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1159 if (!strncmp(GVar->getName().data(), "filename", 8)) 1160 return; 1161 if (GVar->use_empty()) 1162 return; 1163 } 1164 1165 const Function *demotedFunc = 0; 1166 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1167 O << "// " << GVar->getName().str() << " has been demoted\n"; 1168 if (localDecls.find(demotedFunc) != localDecls.end()) 1169 localDecls[demotedFunc].push_back(GVar); 1170 else { 1171 std::vector<const GlobalVariable *> temp; 1172 temp.push_back(GVar); 1173 localDecls[demotedFunc] = temp; 1174 } 1175 return; 1176 } 1177 1178 O << "."; 1179 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1180 if (GVar->getAlignment() == 0) 1181 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1182 else 1183 O << " .align " << GVar->getAlignment(); 1184 1185 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1186 O << " ."; 1187 // Special case: ABI requires that we use .u8 for predicates 1188 if (ETy->isIntegerTy(1)) 1189 O << "u8"; 1190 else 1191 O << getPTXFundamentalTypeStr(ETy, false); 1192 O << " "; 1193 O << *Mang->getSymbol(GVar); 1194 1195 // Ptx allows variable initilization only for constant and global state 1196 // spaces. 1197 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1198 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1199 GVar->hasInitializer()) { 1200 const Constant *Initializer = GVar->getInitializer(); 1201 if (!Initializer->isNullValue()) { 1202 O << " = "; 1203 printScalarConstant(Initializer, O); 1204 } 1205 } 1206 } else { 1207 unsigned int ElementSize = 0; 1208 1209 // Although PTX has direct support for struct type and array type and 1210 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1211 // targets that support these high level field accesses. Structs, arrays 1212 // and vectors are lowered into arrays of bytes. 1213 switch (ETy->getTypeID()) { 1214 case Type::StructTyID: 1215 case Type::ArrayTyID: 1216 case Type::VectorTyID: 1217 ElementSize = TD->getTypeStoreSize(ETy); 1218 // Ptx allows variable initilization only for constant and 1219 // global state spaces. 1220 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1221 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1222 GVar->hasInitializer()) { 1223 const Constant *Initializer = GVar->getInitializer(); 1224 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1225 AggBuffer aggBuffer(ElementSize, O, *this); 1226 bufferAggregateConstant(Initializer, &aggBuffer); 1227 if (aggBuffer.numSymbols) { 1228 if (nvptxSubtarget.is64Bit()) { 1229 O << " .u64 " << *Mang->getSymbol(GVar) << "["; 1230 O << ElementSize / 8; 1231 } else { 1232 O << " .u32 " << *Mang->getSymbol(GVar) << "["; 1233 O << ElementSize / 4; 1234 } 1235 O << "]"; 1236 } else { 1237 O << " .b8 " << *Mang->getSymbol(GVar) << "["; 1238 O << ElementSize; 1239 O << "]"; 1240 } 1241 O << " = {"; 1242 aggBuffer.print(); 1243 O << "}"; 1244 } else { 1245 O << " .b8 " << *Mang->getSymbol(GVar); 1246 if (ElementSize) { 1247 O << "["; 1248 O << ElementSize; 1249 O << "]"; 1250 } 1251 } 1252 } else { 1253 O << " .b8 " << *Mang->getSymbol(GVar); 1254 if (ElementSize) { 1255 O << "["; 1256 O << ElementSize; 1257 O << "]"; 1258 } 1259 } 1260 break; 1261 default: 1262 assert(0 && "type not supported yet"); 1263 } 1264 1265 } 1266 O << ";\n"; 1267 } 1268 1269 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1270 if (localDecls.find(f) == localDecls.end()) 1271 return; 1272 1273 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 1274 1275 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1276 O << "\t// demoted variable\n\t"; 1277 printModuleLevelGV(gvars[i], O, true); 1278 } 1279 } 1280 1281 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1282 raw_ostream &O) const { 1283 switch (AddressSpace) { 1284 case llvm::ADDRESS_SPACE_LOCAL: 1285 O << "local"; 1286 break; 1287 case llvm::ADDRESS_SPACE_GLOBAL: 1288 O << "global"; 1289 break; 1290 case llvm::ADDRESS_SPACE_CONST: 1291 O << "const"; 1292 break; 1293 case llvm::ADDRESS_SPACE_SHARED: 1294 O << "shared"; 1295 break; 1296 default: 1297 report_fatal_error("Bad address space found while emitting PTX"); 1298 break; 1299 } 1300 } 1301 1302 std::string 1303 NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { 1304 switch (Ty->getTypeID()) { 1305 default: 1306 llvm_unreachable("unexpected type"); 1307 break; 1308 case Type::IntegerTyID: { 1309 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1310 if (NumBits == 1) 1311 return "pred"; 1312 else if (NumBits <= 64) { 1313 std::string name = "u"; 1314 return name + utostr(NumBits); 1315 } else { 1316 llvm_unreachable("Integer too large"); 1317 break; 1318 } 1319 break; 1320 } 1321 case Type::FloatTyID: 1322 return "f32"; 1323 case Type::DoubleTyID: 1324 return "f64"; 1325 case Type::PointerTyID: 1326 if (nvptxSubtarget.is64Bit()) 1327 if (useB4PTR) 1328 return "b64"; 1329 else 1330 return "u64"; 1331 else if (useB4PTR) 1332 return "b32"; 1333 else 1334 return "u32"; 1335 } 1336 llvm_unreachable("unexpected type"); 1337 return NULL; 1338 } 1339 1340 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1341 raw_ostream &O) { 1342 1343 const DataLayout *TD = TM.getDataLayout(); 1344 1345 // GlobalVariables are always constant pointers themselves. 1346 const PointerType *PTy = GVar->getType(); 1347 Type *ETy = PTy->getElementType(); 1348 1349 O << "."; 1350 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1351 if (GVar->getAlignment() == 0) 1352 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1353 else 1354 O << " .align " << GVar->getAlignment(); 1355 1356 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1357 O << " ."; 1358 O << getPTXFundamentalTypeStr(ETy); 1359 O << " "; 1360 O << *Mang->getSymbol(GVar); 1361 return; 1362 } 1363 1364 int64_t ElementSize = 0; 1365 1366 // Although PTX has direct support for struct type and array type and LLVM IR 1367 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1368 // support these high level field accesses. Structs and arrays are lowered 1369 // into arrays of bytes. 1370 switch (ETy->getTypeID()) { 1371 case Type::StructTyID: 1372 case Type::ArrayTyID: 1373 case Type::VectorTyID: 1374 ElementSize = TD->getTypeStoreSize(ETy); 1375 O << " .b8 " << *Mang->getSymbol(GVar) << "["; 1376 if (ElementSize) { 1377 O << itostr(ElementSize); 1378 } 1379 O << "]"; 1380 break; 1381 default: 1382 assert(0 && "type not supported yet"); 1383 } 1384 return; 1385 } 1386 1387 static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { 1388 if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) 1389 return TD->getPrefTypeAlignment(Ty); 1390 1391 const ArrayType *ATy = dyn_cast<ArrayType>(Ty); 1392 if (ATy) 1393 return getOpenCLAlignment(TD, ATy->getElementType()); 1394 1395 const VectorType *VTy = dyn_cast<VectorType>(Ty); 1396 if (VTy) { 1397 Type *ETy = VTy->getElementType(); 1398 unsigned int numE = VTy->getNumElements(); 1399 unsigned int alignE = TD->getPrefTypeAlignment(ETy); 1400 if (numE == 3) 1401 return 4 * alignE; 1402 else 1403 return numE * alignE; 1404 } 1405 1406 const StructType *STy = dyn_cast<StructType>(Ty); 1407 if (STy) { 1408 unsigned int alignStruct = 1; 1409 // Go through each element of the struct and find the 1410 // largest alignment. 1411 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1412 Type *ETy = STy->getElementType(i); 1413 unsigned int align = getOpenCLAlignment(TD, ETy); 1414 if (align > alignStruct) 1415 alignStruct = align; 1416 } 1417 return alignStruct; 1418 } 1419 1420 const FunctionType *FTy = dyn_cast<FunctionType>(Ty); 1421 if (FTy) 1422 return TD->getPointerPrefAlignment(); 1423 return TD->getPrefTypeAlignment(Ty); 1424 } 1425 1426 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1427 int paramIndex, raw_ostream &O) { 1428 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1429 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) 1430 O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex; 1431 else { 1432 std::string argName = I->getName(); 1433 const char *p = argName.c_str(); 1434 while (*p) { 1435 if (*p == '.') 1436 O << "_"; 1437 else 1438 O << *p; 1439 p++; 1440 } 1441 } 1442 } 1443 1444 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { 1445 Function::const_arg_iterator I, E; 1446 int i = 0; 1447 1448 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1449 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { 1450 O << *CurrentFnSym << "_param_" << paramIndex; 1451 return; 1452 } 1453 1454 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { 1455 if (i == paramIndex) { 1456 printParamName(I, paramIndex, O); 1457 return; 1458 } 1459 } 1460 llvm_unreachable("paramIndex out of bound"); 1461 } 1462 1463 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1464 const DataLayout *TD = TM.getDataLayout(); 1465 const AttributeSet &PAL = F->getAttributes(); 1466 const TargetLowering *TLI = TM.getTargetLowering(); 1467 Function::const_arg_iterator I, E; 1468 unsigned paramIndex = 0; 1469 bool first = true; 1470 bool isKernelFunc = llvm::isKernelFunction(*F); 1471 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1472 MVT thePointerTy = TLI->getPointerTy(); 1473 1474 O << "(\n"; 1475 1476 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1477 Type *Ty = I->getType(); 1478 1479 if (!first) 1480 O << ",\n"; 1481 1482 first = false; 1483 1484 // Handle image/sampler parameters 1485 if (llvm::isSampler(*I) || llvm::isImage(*I)) { 1486 if (llvm::isImage(*I)) { 1487 std::string sname = I->getName(); 1488 if (llvm::isImageWriteOnly(*I)) 1489 O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_" 1490 << paramIndex; 1491 else // Default image is read_only 1492 O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_" 1493 << paramIndex; 1494 } else // Should be llvm::isSampler(*I) 1495 O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_" 1496 << paramIndex; 1497 continue; 1498 } 1499 1500 if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { 1501 if (Ty->isVectorTy()) { 1502 // Just print .param .b8 .align <a> .param[size]; 1503 // <a> = PAL.getparamalignment 1504 // size = typeallocsize of element type 1505 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1506 if (align == 0) 1507 align = TD->getABITypeAlignment(Ty); 1508 1509 unsigned sz = TD->getTypeAllocSize(Ty); 1510 O << "\t.param .align " << align << " .b8 "; 1511 printParamName(I, paramIndex, O); 1512 O << "[" << sz << "]"; 1513 1514 continue; 1515 } 1516 // Just a scalar 1517 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1518 if (isKernelFunc) { 1519 if (PTy) { 1520 // Special handling for pointer arguments to kernel 1521 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1522 1523 if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { 1524 Type *ETy = PTy->getElementType(); 1525 int addrSpace = PTy->getAddressSpace(); 1526 switch (addrSpace) { 1527 default: 1528 O << ".ptr "; 1529 break; 1530 case llvm::ADDRESS_SPACE_CONST: 1531 O << ".ptr .const "; 1532 break; 1533 case llvm::ADDRESS_SPACE_SHARED: 1534 O << ".ptr .shared "; 1535 break; 1536 case llvm::ADDRESS_SPACE_GLOBAL: 1537 O << ".ptr .global "; 1538 break; 1539 } 1540 O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; 1541 } 1542 printParamName(I, paramIndex, O); 1543 continue; 1544 } 1545 1546 // non-pointer scalar to kernel func 1547 O << "\t.param ."; 1548 // Special case: predicate operands become .u8 types 1549 if (Ty->isIntegerTy(1)) 1550 O << "u8"; 1551 else 1552 O << getPTXFundamentalTypeStr(Ty); 1553 O << " "; 1554 printParamName(I, paramIndex, O); 1555 continue; 1556 } 1557 // Non-kernel function, just print .param .b<size> for ABI 1558 // and .reg .b<size> for non ABY 1559 unsigned sz = 0; 1560 if (isa<IntegerType>(Ty)) { 1561 sz = cast<IntegerType>(Ty)->getBitWidth(); 1562 if (sz < 32) 1563 sz = 32; 1564 } else if (isa<PointerType>(Ty)) 1565 sz = thePointerTy.getSizeInBits(); 1566 else 1567 sz = Ty->getPrimitiveSizeInBits(); 1568 if (isABI) 1569 O << "\t.param .b" << sz << " "; 1570 else 1571 O << "\t.reg .b" << sz << " "; 1572 printParamName(I, paramIndex, O); 1573 continue; 1574 } 1575 1576 // param has byVal attribute. So should be a pointer 1577 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1578 assert(PTy && "Param with byval attribute should be a pointer type"); 1579 Type *ETy = PTy->getElementType(); 1580 1581 if (isABI || isKernelFunc) { 1582 // Just print .param .b8 .align <a> .param[size]; 1583 // <a> = PAL.getparamalignment 1584 // size = typeallocsize of element type 1585 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1586 if (align == 0) 1587 align = TD->getABITypeAlignment(ETy); 1588 1589 unsigned sz = TD->getTypeAllocSize(ETy); 1590 O << "\t.param .align " << align << " .b8 "; 1591 printParamName(I, paramIndex, O); 1592 O << "[" << sz << "]"; 1593 continue; 1594 } else { 1595 // Split the ETy into constituent parts and 1596 // print .param .b<size> <name> for each part. 1597 // Further, if a part is vector, print the above for 1598 // each vector element. 1599 SmallVector<EVT, 16> vtparts; 1600 ComputeValueVTs(*TLI, ETy, vtparts); 1601 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1602 unsigned elems = 1; 1603 EVT elemtype = vtparts[i]; 1604 if (vtparts[i].isVector()) { 1605 elems = vtparts[i].getVectorNumElements(); 1606 elemtype = vtparts[i].getVectorElementType(); 1607 } 1608 1609 for (unsigned j = 0, je = elems; j != je; ++j) { 1610 unsigned sz = elemtype.getSizeInBits(); 1611 if (elemtype.isInteger() && (sz < 32)) 1612 sz = 32; 1613 O << "\t.reg .b" << sz << " "; 1614 printParamName(I, paramIndex, O); 1615 if (j < je - 1) 1616 O << ",\n"; 1617 ++paramIndex; 1618 } 1619 if (i < e - 1) 1620 O << ",\n"; 1621 } 1622 --paramIndex; 1623 continue; 1624 } 1625 } 1626 1627 O << "\n)\n"; 1628 } 1629 1630 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1631 raw_ostream &O) { 1632 const Function *F = MF.getFunction(); 1633 emitFunctionParamList(F, O); 1634 } 1635 1636 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1637 const MachineFunction &MF) { 1638 SmallString<128> Str; 1639 raw_svector_ostream O(Str); 1640 1641 // Map the global virtual register number to a register class specific 1642 // virtual register number starting from 1 with that class. 1643 const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 1644 //unsigned numRegClasses = TRI->getNumRegClasses(); 1645 1646 // Emit the Fake Stack Object 1647 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1648 int NumBytes = (int) MFI->getStackSize(); 1649 if (NumBytes) { 1650 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME 1651 << getFunctionNumber() << "[" << NumBytes << "];\n"; 1652 if (nvptxSubtarget.is64Bit()) { 1653 O << "\t.reg .b64 \t%SP;\n"; 1654 O << "\t.reg .b64 \t%SPL;\n"; 1655 } else { 1656 O << "\t.reg .b32 \t%SP;\n"; 1657 O << "\t.reg .b32 \t%SPL;\n"; 1658 } 1659 } 1660 1661 // Go through all virtual registers to establish the mapping between the 1662 // global virtual 1663 // register number and the per class virtual register number. 1664 // We use the per class virtual register number in the ptx output. 1665 unsigned int numVRs = MRI->getNumVirtRegs(); 1666 for (unsigned i = 0; i < numVRs; i++) { 1667 unsigned int vr = TRI->index2VirtReg(i); 1668 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1669 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1670 int n = regmap.size(); 1671 regmap.insert(std::make_pair(vr, n + 1)); 1672 } 1673 1674 // Emit register declarations 1675 // @TODO: Extract out the real register usage 1676 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1677 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1678 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1679 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1680 // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; 1681 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1682 // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; 1683 1684 // Emit declaration of the virtual registers or 'physical' registers for 1685 // each register class 1686 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 1687 const TargetRegisterClass *RC = TRI->getRegClass(i); 1688 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1689 std::string rcname = getNVPTXRegClassName(RC); 1690 std::string rcStr = getNVPTXRegClassStr(RC); 1691 int n = regmap.size(); 1692 1693 // Only declare those registers that may be used. 1694 if (n) { 1695 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1696 << ">;\n"; 1697 } 1698 } 1699 1700 OutStreamer.EmitRawText(O.str()); 1701 } 1702 1703 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1704 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1705 bool ignored; 1706 unsigned int numHex; 1707 const char *lead; 1708 1709 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 1710 numHex = 8; 1711 lead = "0f"; 1712 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); 1713 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1714 numHex = 16; 1715 lead = "0d"; 1716 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); 1717 } else 1718 llvm_unreachable("unsupported fp type"); 1719 1720 APInt API = APF.bitcastToAPInt(); 1721 std::string hexstr(utohexstr(API.getZExtValue())); 1722 O << lead; 1723 if (hexstr.length() < numHex) 1724 O << std::string(numHex - hexstr.length(), '0'); 1725 O << utohexstr(API.getZExtValue()); 1726 } 1727 1728 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 1729 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1730 O << CI->getValue(); 1731 return; 1732 } 1733 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1734 printFPConstant(CFP, O); 1735 return; 1736 } 1737 if (isa<ConstantPointerNull>(CPV)) { 1738 O << "0"; 1739 return; 1740 } 1741 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1742 O << *Mang->getSymbol(GVar); 1743 return; 1744 } 1745 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1746 const Value *v = Cexpr->stripPointerCasts(); 1747 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1748 O << *Mang->getSymbol(GVar); 1749 return; 1750 } else { 1751 O << *LowerConstant(CPV, *this); 1752 return; 1753 } 1754 } 1755 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1756 } 1757 1758 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 1759 AggBuffer *aggBuffer) { 1760 1761 const DataLayout *TD = TM.getDataLayout(); 1762 1763 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1764 int s = TD->getTypeAllocSize(CPV->getType()); 1765 if (s < Bytes) 1766 s = Bytes; 1767 aggBuffer->addZeros(s); 1768 return; 1769 } 1770 1771 unsigned char *ptr; 1772 switch (CPV->getType()->getTypeID()) { 1773 1774 case Type::IntegerTyID: { 1775 const Type *ETy = CPV->getType(); 1776 if (ETy == Type::getInt8Ty(CPV->getContext())) { 1777 unsigned char c = 1778 (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1779 ptr = &c; 1780 aggBuffer->addBytes(ptr, 1, Bytes); 1781 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 1782 short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1783 ptr = (unsigned char *)&int16; 1784 aggBuffer->addBytes(ptr, 2, Bytes); 1785 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 1786 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1787 int int32 = (int)(constInt->getZExtValue()); 1788 ptr = (unsigned char *)&int32; 1789 aggBuffer->addBytes(ptr, 4, Bytes); 1790 break; 1791 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1792 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1793 ConstantFoldConstantExpression(Cexpr, TD))) { 1794 int int32 = (int)(constInt->getZExtValue()); 1795 ptr = (unsigned char *)&int32; 1796 aggBuffer->addBytes(ptr, 4, Bytes); 1797 break; 1798 } 1799 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1800 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1801 aggBuffer->addSymbol(v); 1802 aggBuffer->addZeros(4); 1803 break; 1804 } 1805 } 1806 llvm_unreachable("unsupported integer const type"); 1807 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 1808 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1809 long long int64 = (long long)(constInt->getZExtValue()); 1810 ptr = (unsigned char *)&int64; 1811 aggBuffer->addBytes(ptr, 8, Bytes); 1812 break; 1813 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1814 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1815 ConstantFoldConstantExpression(Cexpr, TD))) { 1816 long long int64 = (long long)(constInt->getZExtValue()); 1817 ptr = (unsigned char *)&int64; 1818 aggBuffer->addBytes(ptr, 8, Bytes); 1819 break; 1820 } 1821 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1822 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1823 aggBuffer->addSymbol(v); 1824 aggBuffer->addZeros(8); 1825 break; 1826 } 1827 } 1828 llvm_unreachable("unsupported integer const type"); 1829 } else 1830 llvm_unreachable("unsupported integer const type"); 1831 break; 1832 } 1833 case Type::FloatTyID: 1834 case Type::DoubleTyID: { 1835 const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 1836 const Type *Ty = CFP->getType(); 1837 if (Ty == Type::getFloatTy(CPV->getContext())) { 1838 float float32 = (float) CFP->getValueAPF().convertToFloat(); 1839 ptr = (unsigned char *)&float32; 1840 aggBuffer->addBytes(ptr, 4, Bytes); 1841 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1842 double float64 = CFP->getValueAPF().convertToDouble(); 1843 ptr = (unsigned char *)&float64; 1844 aggBuffer->addBytes(ptr, 8, Bytes); 1845 } else { 1846 llvm_unreachable("unsupported fp const type"); 1847 } 1848 break; 1849 } 1850 case Type::PointerTyID: { 1851 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1852 aggBuffer->addSymbol(GVar); 1853 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1854 const Value *v = Cexpr->stripPointerCasts(); 1855 aggBuffer->addSymbol(v); 1856 } 1857 unsigned int s = TD->getTypeAllocSize(CPV->getType()); 1858 aggBuffer->addZeros(s); 1859 break; 1860 } 1861 1862 case Type::ArrayTyID: 1863 case Type::VectorTyID: 1864 case Type::StructTyID: { 1865 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 1866 isa<ConstantStruct>(CPV)) { 1867 int ElementSize = TD->getTypeAllocSize(CPV->getType()); 1868 bufferAggregateConstant(CPV, aggBuffer); 1869 if (Bytes > ElementSize) 1870 aggBuffer->addZeros(Bytes - ElementSize); 1871 } else if (isa<ConstantAggregateZero>(CPV)) 1872 aggBuffer->addZeros(Bytes); 1873 else 1874 llvm_unreachable("Unexpected Constant type"); 1875 break; 1876 } 1877 1878 default: 1879 llvm_unreachable("unsupported type"); 1880 } 1881 } 1882 1883 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 1884 AggBuffer *aggBuffer) { 1885 const DataLayout *TD = TM.getDataLayout(); 1886 int Bytes; 1887 1888 // Old constants 1889 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1890 if (CPV->getNumOperands()) 1891 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1892 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1893 return; 1894 } 1895 1896 if (const ConstantDataSequential *CDS = 1897 dyn_cast<ConstantDataSequential>(CPV)) { 1898 if (CDS->getNumElements()) 1899 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1900 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1901 aggBuffer); 1902 return; 1903 } 1904 1905 if (isa<ConstantStruct>(CPV)) { 1906 if (CPV->getNumOperands()) { 1907 StructType *ST = cast<StructType>(CPV->getType()); 1908 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1909 if (i == (e - 1)) 1910 Bytes = TD->getStructLayout(ST)->getElementOffset(0) + 1911 TD->getTypeAllocSize(ST) - 1912 TD->getStructLayout(ST)->getElementOffset(i); 1913 else 1914 Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - 1915 TD->getStructLayout(ST)->getElementOffset(i); 1916 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 1917 } 1918 } 1919 return; 1920 } 1921 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1922 } 1923 1924 // buildTypeNameMap - Run through symbol table looking for type names. 1925 // 1926 1927 bool NVPTXAsmPrinter::isImageType(const Type *Ty) { 1928 1929 std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); 1930 1931 if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || 1932 !PI->second.compare("struct._image2d_t") || 1933 !PI->second.compare("struct._image3d_t"))) 1934 return true; 1935 1936 return false; 1937 } 1938 1939 1940 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 1941 switch (MI.getOpcode()) { 1942 default: 1943 return false; 1944 case NVPTX::CallArgBeginInst: 1945 case NVPTX::CallArgEndInst0: 1946 case NVPTX::CallArgEndInst1: 1947 case NVPTX::CallArgF32: 1948 case NVPTX::CallArgF64: 1949 case NVPTX::CallArgI16: 1950 case NVPTX::CallArgI32: 1951 case NVPTX::CallArgI32imm: 1952 case NVPTX::CallArgI64: 1953 case NVPTX::CallArgParam: 1954 case NVPTX::CallVoidInst: 1955 case NVPTX::CallVoidInstReg: 1956 case NVPTX::Callseq_End: 1957 case NVPTX::CallVoidInstReg64: 1958 case NVPTX::DeclareParamInst: 1959 case NVPTX::DeclareRetMemInst: 1960 case NVPTX::DeclareRetRegInst: 1961 case NVPTX::DeclareRetScalarInst: 1962 case NVPTX::DeclareScalarParamInst: 1963 case NVPTX::DeclareScalarRegInst: 1964 case NVPTX::StoreParamF32: 1965 case NVPTX::StoreParamF64: 1966 case NVPTX::StoreParamI16: 1967 case NVPTX::StoreParamI32: 1968 case NVPTX::StoreParamI64: 1969 case NVPTX::StoreParamI8: 1970 case NVPTX::StoreRetvalF32: 1971 case NVPTX::StoreRetvalF64: 1972 case NVPTX::StoreRetvalI16: 1973 case NVPTX::StoreRetvalI32: 1974 case NVPTX::StoreRetvalI64: 1975 case NVPTX::StoreRetvalI8: 1976 case NVPTX::LastCallArgF32: 1977 case NVPTX::LastCallArgF64: 1978 case NVPTX::LastCallArgI16: 1979 case NVPTX::LastCallArgI32: 1980 case NVPTX::LastCallArgI32imm: 1981 case NVPTX::LastCallArgI64: 1982 case NVPTX::LastCallArgParam: 1983 case NVPTX::LoadParamMemF32: 1984 case NVPTX::LoadParamMemF64: 1985 case NVPTX::LoadParamMemI16: 1986 case NVPTX::LoadParamMemI32: 1987 case NVPTX::LoadParamMemI64: 1988 case NVPTX::LoadParamMemI8: 1989 case NVPTX::PrototypeInst: 1990 case NVPTX::DBG_VALUE: 1991 return true; 1992 } 1993 return false; 1994 } 1995 1996 // Force static initialization. 1997 extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { 1998 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 1999 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2000 } 2001 2002 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2003 std::stringstream temp; 2004 LineReader *reader = this->getReader(filename.str()); 2005 temp << "\n//"; 2006 temp << filename.str(); 2007 temp << ":"; 2008 temp << line; 2009 temp << " "; 2010 temp << reader->readLine(line); 2011 temp << "\n"; 2012 this->OutStreamer.EmitRawText(Twine(temp.str())); 2013 } 2014 2015 LineReader *NVPTXAsmPrinter::getReader(std::string filename) { 2016 if (reader == NULL) { 2017 reader = new LineReader(filename); 2018 } 2019 2020 if (reader->fileName() != filename) { 2021 delete reader; 2022 reader = new LineReader(filename); 2023 } 2024 2025 return reader; 2026 } 2027 2028 std::string LineReader::readLine(unsigned lineNum) { 2029 if (lineNum < theCurLine) { 2030 theCurLine = 0; 2031 fstr.seekg(0, std::ios::beg); 2032 } 2033 while (theCurLine < lineNum) { 2034 fstr.getline(buff, 500); 2035 theCurLine++; 2036 } 2037 return buff; 2038 } 2039 2040 // Force static initialization. 2041 extern "C" void LLVMInitializeNVPTXAsmPrinter() { 2042 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2043 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2044 } 2045