1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains a printer that converts from our internal representation 11 // of machine-dependent LLVM code to NVPTX assembly language. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "NVPTXAsmPrinter.h" 16 #include "InstPrinter/NVPTXInstPrinter.h" 17 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 18 #include "NVPTX.h" 19 #include "NVPTXInstrInfo.h" 20 #include "NVPTXMachineFunctionInfo.h" 21 #include "NVPTXMCExpr.h" 22 #include "NVPTXRegisterInfo.h" 23 #include "NVPTXTargetMachine.h" 24 #include "NVPTXUtilities.h" 25 #include "cl_common_defines.h" 26 #include "llvm/ADT/StringExtras.h" 27 #include "llvm/Analysis/ConstantFolding.h" 28 #include "llvm/CodeGen/Analysis.h" 29 #include "llvm/CodeGen/MachineFrameInfo.h" 30 #include "llvm/CodeGen/MachineModuleInfo.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/IR/DebugInfo.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/GlobalVariable.h" 36 #include "llvm/IR/Mangler.h" 37 #include "llvm/IR/Module.h" 38 #include "llvm/IR/Operator.h" 39 #include "llvm/MC/MCStreamer.h" 40 #include "llvm/MC/MCSymbol.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/FormattedStream.h" 44 #include "llvm/Support/Path.h" 45 #include "llvm/Support/TargetRegistry.h" 46 #include "llvm/Support/TimeValue.h" 47 #include "llvm/Target/TargetLoweringObjectFile.h" 48 #include <sstream> 49 using namespace llvm; 50 51 #define DEPOTNAME "__local_depot" 52 53 static cl::opt<bool> 54 EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden, 55 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 56 cl::init(true)); 57 58 static cl::opt<bool> 59 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden, 60 cl::desc("NVPTX Specific: Emit source line in ptx file"), 61 cl::init(false)); 62 63 namespace { 64 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 65 /// depends. 66 void DiscoverDependentGlobals(const Value *V, 67 DenseSet<const GlobalVariable *> &Globals) { 68 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 69 Globals.insert(GV); 70 else { 71 if (const User *U = dyn_cast<User>(V)) { 72 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 73 DiscoverDependentGlobals(U->getOperand(i), Globals); 74 } 75 } 76 } 77 } 78 79 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 80 /// instances to be emitted, but only after any dependents have been added 81 /// first. 82 void VisitGlobalVariableForEmission( 83 const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order, 84 DenseSet<const GlobalVariable *> &Visited, 85 DenseSet<const GlobalVariable *> &Visiting) { 86 // Have we already visited this one? 87 if (Visited.count(GV)) 88 return; 89 90 // Do we have a circular dependency? 91 if (Visiting.count(GV)) 92 report_fatal_error("Circular dependency found in global variable set"); 93 94 // Start visiting this global 95 Visiting.insert(GV); 96 97 // Make sure we visit all dependents first 98 DenseSet<const GlobalVariable *> Others; 99 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 100 DiscoverDependentGlobals(GV->getOperand(i), Others); 101 102 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 103 E = Others.end(); 104 I != E; ++I) 105 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 106 107 // Now we can visit ourself 108 Order.push_back(GV); 109 Visited.insert(GV); 110 Visiting.erase(GV); 111 } 112 } 113 114 // @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we 115 // cannot just link to the existing version. 116 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr. 117 /// 118 using namespace nvptx; 119 const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { 120 MCContext &Ctx = AP.OutContext; 121 122 if (CV->isNullValue() || isa<UndefValue>(CV)) 123 return MCConstantExpr::Create(0, Ctx); 124 125 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 126 return MCConstantExpr::Create(CI->getZExtValue(), Ctx); 127 128 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) 129 return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); 130 131 if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) 132 return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); 133 134 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 135 if (!CE) 136 llvm_unreachable("Unknown constant value to lower!"); 137 138 switch (CE->getOpcode()) { 139 default: 140 // If the code isn't optimized, there may be outstanding folding 141 // opportunities. Attempt to fold the expression using DataLayout as a 142 // last resort before giving up. 143 if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) 144 if (C != CE) 145 return LowerConstant(C, AP); 146 147 // Otherwise report the problem to the user. 148 { 149 std::string S; 150 raw_string_ostream OS(S); 151 OS << "Unsupported expression in static initializer: "; 152 CE->printAsOperand(OS, /*PrintType=*/ false, 153 !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); 154 report_fatal_error(OS.str()); 155 } 156 case Instruction::AddrSpaceCast: { 157 // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be 158 // handled by the generic() logic in the MCExpr printer 159 PointerType *DstTy = cast<PointerType>(CE->getType()); 160 PointerType *SrcTy = cast<PointerType>(CE->getOperand(0)->getType()); 161 if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) { 162 return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP); 163 } 164 std::string S; 165 raw_string_ostream OS(S); 166 OS << "Unsupported expression in static initializer: "; 167 CE->printAsOperand(OS, /*PrintType=*/ false, 168 !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); 169 report_fatal_error(OS.str()); 170 } 171 case Instruction::GetElementPtr: { 172 const DataLayout &TD = *AP.TM.getDataLayout(); 173 // Generate a symbolic expression for the byte address 174 APInt OffsetAI(TD.getPointerSizeInBits(), 0); 175 cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); 176 177 const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); 178 if (!OffsetAI) 179 return Base; 180 181 int64_t Offset = OffsetAI.getSExtValue(); 182 return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), 183 Ctx); 184 } 185 186 case Instruction::Trunc: 187 // We emit the value and depend on the assembler to truncate the generated 188 // expression properly. This is important for differences between 189 // blockaddress labels. Since the two labels are in the same function, it 190 // is reasonable to treat their delta as a 32-bit value. 191 // FALL THROUGH. 192 case Instruction::BitCast: 193 return LowerConstant(CE->getOperand(0), AP); 194 195 case Instruction::IntToPtr: { 196 const DataLayout &TD = *AP.TM.getDataLayout(); 197 // Handle casts to pointers by changing them into casts to the appropriate 198 // integer type. This promotes constant folding and simplifies this code. 199 Constant *Op = CE->getOperand(0); 200 Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), 201 false /*ZExt*/); 202 return LowerConstant(Op, AP); 203 } 204 205 case Instruction::PtrToInt: { 206 const DataLayout &TD = *AP.TM.getDataLayout(); 207 // Support only foldable casts to/from pointers that can be eliminated by 208 // changing the pointer to the appropriately sized integer type. 209 Constant *Op = CE->getOperand(0); 210 Type *Ty = CE->getType(); 211 212 const MCExpr *OpExpr = LowerConstant(Op, AP); 213 214 // We can emit the pointer value into this slot if the slot is an 215 // integer slot equal to the size of the pointer. 216 if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) 217 return OpExpr; 218 219 // Otherwise the pointer is smaller than the resultant integer, mask off 220 // the high bits so we are sure to get a proper truncation if the input is 221 // a constant expr. 222 unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); 223 const MCExpr *MaskExpr = 224 MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); 225 return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); 226 } 227 228 // The MC library also has a right-shift operator, but it isn't consistently 229 // signed or unsigned between different targets. 230 case Instruction::Add: 231 case Instruction::Sub: 232 case Instruction::Mul: 233 case Instruction::SDiv: 234 case Instruction::SRem: 235 case Instruction::Shl: 236 case Instruction::And: 237 case Instruction::Or: 238 case Instruction::Xor: { 239 const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); 240 const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); 241 switch (CE->getOpcode()) { 242 default: 243 llvm_unreachable("Unknown binary operator constant cast expr"); 244 case Instruction::Add: 245 return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); 246 case Instruction::Sub: 247 return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); 248 case Instruction::Mul: 249 return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); 250 case Instruction::SDiv: 251 return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); 252 case Instruction::SRem: 253 return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); 254 case Instruction::Shl: 255 return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); 256 case Instruction::And: 257 return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); 258 case Instruction::Or: 259 return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); 260 case Instruction::Xor: 261 return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); 262 } 263 } 264 } 265 } 266 267 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 268 if (!EmitLineNumbers) 269 return; 270 if (ignoreLoc(MI)) 271 return; 272 273 DebugLoc curLoc = MI.getDebugLoc(); 274 275 if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) 276 return; 277 278 if (prevDebugLoc == curLoc) 279 return; 280 281 prevDebugLoc = curLoc; 282 283 if (curLoc.isUnknown()) 284 return; 285 286 const MachineFunction *MF = MI.getParent()->getParent(); 287 //const TargetMachine &TM = MF->getTarget(); 288 289 const LLVMContext &ctx = MF->getFunction()->getContext(); 290 DIScope Scope(curLoc.getScope(ctx)); 291 292 assert((!Scope || Scope.isScope()) && 293 "Scope of a DebugLoc should be null or a DIScope."); 294 if (!Scope) 295 return; 296 297 StringRef fileName(Scope.getFilename()); 298 StringRef dirName(Scope.getDirectory()); 299 SmallString<128> FullPathName = dirName; 300 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 301 sys::path::append(FullPathName, fileName); 302 fileName = FullPathName.str(); 303 } 304 305 if (filenameMap.find(fileName.str()) == filenameMap.end()) 306 return; 307 308 // Emit the line from the source file. 309 if (InterleaveSrc) 310 this->emitSrcInText(fileName.str(), curLoc.getLine()); 311 312 std::stringstream temp; 313 temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() 314 << " " << curLoc.getCol(); 315 OutStreamer.EmitRawText(Twine(temp.str().c_str())); 316 } 317 318 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 319 SmallString<128> Str; 320 raw_svector_ostream OS(Str); 321 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 322 emitLineNumberAsDotLoc(*MI); 323 324 MCInst Inst; 325 lowerToMCInst(MI, Inst); 326 EmitToStreamer(OutStreamer, Inst); 327 } 328 329 // Handle symbol backtracking for targets that do not support image handles 330 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, 331 unsigned OpNo, MCOperand &MCOp) { 332 const MachineOperand &MO = MI->getOperand(OpNo); 333 334 switch (MI->getOpcode()) { 335 default: return false; 336 case NVPTX::TEX_1D_F32_I32: 337 case NVPTX::TEX_1D_F32_F32: 338 case NVPTX::TEX_1D_F32_F32_LEVEL: 339 case NVPTX::TEX_1D_F32_F32_GRAD: 340 case NVPTX::TEX_1D_I32_I32: 341 case NVPTX::TEX_1D_I32_F32: 342 case NVPTX::TEX_1D_I32_F32_LEVEL: 343 case NVPTX::TEX_1D_I32_F32_GRAD: 344 case NVPTX::TEX_1D_ARRAY_F32_I32: 345 case NVPTX::TEX_1D_ARRAY_F32_F32: 346 case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL: 347 case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD: 348 case NVPTX::TEX_1D_ARRAY_I32_I32: 349 case NVPTX::TEX_1D_ARRAY_I32_F32: 350 case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL: 351 case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD: 352 case NVPTX::TEX_2D_F32_I32: 353 case NVPTX::TEX_2D_F32_F32: 354 case NVPTX::TEX_2D_F32_F32_LEVEL: 355 case NVPTX::TEX_2D_F32_F32_GRAD: 356 case NVPTX::TEX_2D_I32_I32: 357 case NVPTX::TEX_2D_I32_F32: 358 case NVPTX::TEX_2D_I32_F32_LEVEL: 359 case NVPTX::TEX_2D_I32_F32_GRAD: 360 case NVPTX::TEX_2D_ARRAY_F32_I32: 361 case NVPTX::TEX_2D_ARRAY_F32_F32: 362 case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL: 363 case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD: 364 case NVPTX::TEX_2D_ARRAY_I32_I32: 365 case NVPTX::TEX_2D_ARRAY_I32_F32: 366 case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL: 367 case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD: 368 case NVPTX::TEX_3D_F32_I32: 369 case NVPTX::TEX_3D_F32_F32: 370 case NVPTX::TEX_3D_F32_F32_LEVEL: 371 case NVPTX::TEX_3D_F32_F32_GRAD: 372 case NVPTX::TEX_3D_I32_I32: 373 case NVPTX::TEX_3D_I32_F32: 374 case NVPTX::TEX_3D_I32_F32_LEVEL: 375 case NVPTX::TEX_3D_I32_F32_GRAD: 376 { 377 // This is a texture fetch, so operand 4 is a texref and operand 5 is 378 // a samplerref 379 if (OpNo == 4) { 380 lowerImageHandleSymbol(MO.getImm(), MCOp); 381 return true; 382 } 383 if (OpNo == 5) { 384 lowerImageHandleSymbol(MO.getImm(), MCOp); 385 return true; 386 } 387 388 return false; 389 } 390 case NVPTX::SULD_1D_I8_TRAP: 391 case NVPTX::SULD_1D_I16_TRAP: 392 case NVPTX::SULD_1D_I32_TRAP: 393 case NVPTX::SULD_1D_ARRAY_I8_TRAP: 394 case NVPTX::SULD_1D_ARRAY_I16_TRAP: 395 case NVPTX::SULD_1D_ARRAY_I32_TRAP: 396 case NVPTX::SULD_2D_I8_TRAP: 397 case NVPTX::SULD_2D_I16_TRAP: 398 case NVPTX::SULD_2D_I32_TRAP: 399 case NVPTX::SULD_2D_ARRAY_I8_TRAP: 400 case NVPTX::SULD_2D_ARRAY_I16_TRAP: 401 case NVPTX::SULD_2D_ARRAY_I32_TRAP: 402 case NVPTX::SULD_3D_I8_TRAP: 403 case NVPTX::SULD_3D_I16_TRAP: 404 case NVPTX::SULD_3D_I32_TRAP: { 405 // This is a V1 surface load, so operand 1 is a surfref 406 if (OpNo == 1) { 407 lowerImageHandleSymbol(MO.getImm(), MCOp); 408 return true; 409 } 410 411 return false; 412 } 413 case NVPTX::SULD_1D_V2I8_TRAP: 414 case NVPTX::SULD_1D_V2I16_TRAP: 415 case NVPTX::SULD_1D_V2I32_TRAP: 416 case NVPTX::SULD_1D_ARRAY_V2I8_TRAP: 417 case NVPTX::SULD_1D_ARRAY_V2I16_TRAP: 418 case NVPTX::SULD_1D_ARRAY_V2I32_TRAP: 419 case NVPTX::SULD_2D_V2I8_TRAP: 420 case NVPTX::SULD_2D_V2I16_TRAP: 421 case NVPTX::SULD_2D_V2I32_TRAP: 422 case NVPTX::SULD_2D_ARRAY_V2I8_TRAP: 423 case NVPTX::SULD_2D_ARRAY_V2I16_TRAP: 424 case NVPTX::SULD_2D_ARRAY_V2I32_TRAP: 425 case NVPTX::SULD_3D_V2I8_TRAP: 426 case NVPTX::SULD_3D_V2I16_TRAP: 427 case NVPTX::SULD_3D_V2I32_TRAP: { 428 // This is a V2 surface load, so operand 2 is a surfref 429 if (OpNo == 2) { 430 lowerImageHandleSymbol(MO.getImm(), MCOp); 431 return true; 432 } 433 434 return false; 435 } 436 case NVPTX::SULD_1D_V4I8_TRAP: 437 case NVPTX::SULD_1D_V4I16_TRAP: 438 case NVPTX::SULD_1D_V4I32_TRAP: 439 case NVPTX::SULD_1D_ARRAY_V4I8_TRAP: 440 case NVPTX::SULD_1D_ARRAY_V4I16_TRAP: 441 case NVPTX::SULD_1D_ARRAY_V4I32_TRAP: 442 case NVPTX::SULD_2D_V4I8_TRAP: 443 case NVPTX::SULD_2D_V4I16_TRAP: 444 case NVPTX::SULD_2D_V4I32_TRAP: 445 case NVPTX::SULD_2D_ARRAY_V4I8_TRAP: 446 case NVPTX::SULD_2D_ARRAY_V4I16_TRAP: 447 case NVPTX::SULD_2D_ARRAY_V4I32_TRAP: 448 case NVPTX::SULD_3D_V4I8_TRAP: 449 case NVPTX::SULD_3D_V4I16_TRAP: 450 case NVPTX::SULD_3D_V4I32_TRAP: { 451 // This is a V4 surface load, so operand 4 is a surfref 452 if (OpNo == 4) { 453 lowerImageHandleSymbol(MO.getImm(), MCOp); 454 return true; 455 } 456 457 return false; 458 } 459 case NVPTX::SUST_B_1D_B8_TRAP: 460 case NVPTX::SUST_B_1D_B16_TRAP: 461 case NVPTX::SUST_B_1D_B32_TRAP: 462 case NVPTX::SUST_B_1D_V2B8_TRAP: 463 case NVPTX::SUST_B_1D_V2B16_TRAP: 464 case NVPTX::SUST_B_1D_V2B32_TRAP: 465 case NVPTX::SUST_B_1D_V4B8_TRAP: 466 case NVPTX::SUST_B_1D_V4B16_TRAP: 467 case NVPTX::SUST_B_1D_V4B32_TRAP: 468 case NVPTX::SUST_B_1D_ARRAY_B8_TRAP: 469 case NVPTX::SUST_B_1D_ARRAY_B16_TRAP: 470 case NVPTX::SUST_B_1D_ARRAY_B32_TRAP: 471 case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP: 472 case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP: 473 case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP: 474 case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP: 475 case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP: 476 case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP: 477 case NVPTX::SUST_B_2D_B8_TRAP: 478 case NVPTX::SUST_B_2D_B16_TRAP: 479 case NVPTX::SUST_B_2D_B32_TRAP: 480 case NVPTX::SUST_B_2D_V2B8_TRAP: 481 case NVPTX::SUST_B_2D_V2B16_TRAP: 482 case NVPTX::SUST_B_2D_V2B32_TRAP: 483 case NVPTX::SUST_B_2D_V4B8_TRAP: 484 case NVPTX::SUST_B_2D_V4B16_TRAP: 485 case NVPTX::SUST_B_2D_V4B32_TRAP: 486 case NVPTX::SUST_B_2D_ARRAY_B8_TRAP: 487 case NVPTX::SUST_B_2D_ARRAY_B16_TRAP: 488 case NVPTX::SUST_B_2D_ARRAY_B32_TRAP: 489 case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP: 490 case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP: 491 case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP: 492 case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP: 493 case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP: 494 case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP: 495 case NVPTX::SUST_B_3D_B8_TRAP: 496 case NVPTX::SUST_B_3D_B16_TRAP: 497 case NVPTX::SUST_B_3D_B32_TRAP: 498 case NVPTX::SUST_B_3D_V2B8_TRAP: 499 case NVPTX::SUST_B_3D_V2B16_TRAP: 500 case NVPTX::SUST_B_3D_V2B32_TRAP: 501 case NVPTX::SUST_B_3D_V4B8_TRAP: 502 case NVPTX::SUST_B_3D_V4B16_TRAP: 503 case NVPTX::SUST_B_3D_V4B32_TRAP: 504 case NVPTX::SUST_P_1D_B8_TRAP: 505 case NVPTX::SUST_P_1D_B16_TRAP: 506 case NVPTX::SUST_P_1D_B32_TRAP: 507 case NVPTX::SUST_P_1D_V2B8_TRAP: 508 case NVPTX::SUST_P_1D_V2B16_TRAP: 509 case NVPTX::SUST_P_1D_V2B32_TRAP: 510 case NVPTX::SUST_P_1D_V4B8_TRAP: 511 case NVPTX::SUST_P_1D_V4B16_TRAP: 512 case NVPTX::SUST_P_1D_V4B32_TRAP: 513 case NVPTX::SUST_P_1D_ARRAY_B8_TRAP: 514 case NVPTX::SUST_P_1D_ARRAY_B16_TRAP: 515 case NVPTX::SUST_P_1D_ARRAY_B32_TRAP: 516 case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP: 517 case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP: 518 case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP: 519 case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP: 520 case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP: 521 case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP: 522 case NVPTX::SUST_P_2D_B8_TRAP: 523 case NVPTX::SUST_P_2D_B16_TRAP: 524 case NVPTX::SUST_P_2D_B32_TRAP: 525 case NVPTX::SUST_P_2D_V2B8_TRAP: 526 case NVPTX::SUST_P_2D_V2B16_TRAP: 527 case NVPTX::SUST_P_2D_V2B32_TRAP: 528 case NVPTX::SUST_P_2D_V4B8_TRAP: 529 case NVPTX::SUST_P_2D_V4B16_TRAP: 530 case NVPTX::SUST_P_2D_V4B32_TRAP: 531 case NVPTX::SUST_P_2D_ARRAY_B8_TRAP: 532 case NVPTX::SUST_P_2D_ARRAY_B16_TRAP: 533 case NVPTX::SUST_P_2D_ARRAY_B32_TRAP: 534 case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP: 535 case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP: 536 case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP: 537 case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP: 538 case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP: 539 case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP: 540 case NVPTX::SUST_P_3D_B8_TRAP: 541 case NVPTX::SUST_P_3D_B16_TRAP: 542 case NVPTX::SUST_P_3D_B32_TRAP: 543 case NVPTX::SUST_P_3D_V2B8_TRAP: 544 case NVPTX::SUST_P_3D_V2B16_TRAP: 545 case NVPTX::SUST_P_3D_V2B32_TRAP: 546 case NVPTX::SUST_P_3D_V4B8_TRAP: 547 case NVPTX::SUST_P_3D_V4B16_TRAP: 548 case NVPTX::SUST_P_3D_V4B32_TRAP: { 549 // This is a surface store, so operand 0 is a surfref 550 if (OpNo == 0) { 551 lowerImageHandleSymbol(MO.getImm(), MCOp); 552 return true; 553 } 554 555 return false; 556 } 557 case NVPTX::TXQ_CHANNEL_ORDER: 558 case NVPTX::TXQ_CHANNEL_DATA_TYPE: 559 case NVPTX::TXQ_WIDTH: 560 case NVPTX::TXQ_HEIGHT: 561 case NVPTX::TXQ_DEPTH: 562 case NVPTX::TXQ_ARRAY_SIZE: 563 case NVPTX::TXQ_NUM_SAMPLES: 564 case NVPTX::TXQ_NUM_MIPMAP_LEVELS: 565 case NVPTX::SUQ_CHANNEL_ORDER: 566 case NVPTX::SUQ_CHANNEL_DATA_TYPE: 567 case NVPTX::SUQ_WIDTH: 568 case NVPTX::SUQ_HEIGHT: 569 case NVPTX::SUQ_DEPTH: 570 case NVPTX::SUQ_ARRAY_SIZE: { 571 // This is a query, so operand 1 is a surfref/texref 572 if (OpNo == 1) { 573 lowerImageHandleSymbol(MO.getImm(), MCOp); 574 return true; 575 } 576 577 return false; 578 } 579 } 580 } 581 582 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { 583 // Ewwww 584 TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget()); 585 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM); 586 const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>(); 587 const char *Sym = MFI->getImageHandleSymbol(Index); 588 std::string *SymNamePtr = 589 nvTM.getManagedStrPool()->getManagedString(Sym); 590 MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol( 591 StringRef(SymNamePtr->c_str()))); 592 } 593 594 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 595 OutMI.setOpcode(MI->getOpcode()); 596 const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>(); 597 598 // Special: Do not mangle symbol operand of CALL_PROTOTYPE 599 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { 600 const MachineOperand &MO = MI->getOperand(0); 601 OutMI.addOperand(GetSymbolRef( 602 OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName())))); 603 return; 604 } 605 606 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 607 const MachineOperand &MO = MI->getOperand(i); 608 609 MCOperand MCOp; 610 if (!ST.hasImageHandles()) { 611 if (lowerImageHandleOperand(MI, i, MCOp)) { 612 OutMI.addOperand(MCOp); 613 continue; 614 } 615 } 616 617 if (lowerOperand(MO, MCOp)) 618 OutMI.addOperand(MCOp); 619 } 620 } 621 622 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 623 MCOperand &MCOp) { 624 switch (MO.getType()) { 625 default: llvm_unreachable("unknown operand type"); 626 case MachineOperand::MO_Register: 627 MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg())); 628 break; 629 case MachineOperand::MO_Immediate: 630 MCOp = MCOperand::CreateImm(MO.getImm()); 631 break; 632 case MachineOperand::MO_MachineBasicBlock: 633 MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( 634 MO.getMBB()->getSymbol(), OutContext)); 635 break; 636 case MachineOperand::MO_ExternalSymbol: 637 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); 638 break; 639 case MachineOperand::MO_GlobalAddress: 640 MCOp = GetSymbolRef(getSymbol(MO.getGlobal())); 641 break; 642 case MachineOperand::MO_FPImmediate: { 643 const ConstantFP *Cnt = MO.getFPImm(); 644 APFloat Val = Cnt->getValueAPF(); 645 646 switch (Cnt->getType()->getTypeID()) { 647 default: report_fatal_error("Unsupported FP type"); break; 648 case Type::FloatTyID: 649 MCOp = MCOperand::CreateExpr( 650 NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext)); 651 break; 652 case Type::DoubleTyID: 653 MCOp = MCOperand::CreateExpr( 654 NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext)); 655 break; 656 } 657 break; 658 } 659 } 660 return true; 661 } 662 663 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 664 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 665 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 666 667 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 668 unsigned RegNum = RegMap[Reg]; 669 670 // Encode the register class in the upper 4 bits 671 // Must be kept in sync with NVPTXInstPrinter::printRegName 672 unsigned Ret = 0; 673 if (RC == &NVPTX::Int1RegsRegClass) { 674 Ret = (1 << 28); 675 } else if (RC == &NVPTX::Int16RegsRegClass) { 676 Ret = (2 << 28); 677 } else if (RC == &NVPTX::Int32RegsRegClass) { 678 Ret = (3 << 28); 679 } else if (RC == &NVPTX::Int64RegsRegClass) { 680 Ret = (4 << 28); 681 } else if (RC == &NVPTX::Float32RegsRegClass) { 682 Ret = (5 << 28); 683 } else if (RC == &NVPTX::Float64RegsRegClass) { 684 Ret = (6 << 28); 685 } else { 686 report_fatal_error("Bad register class"); 687 } 688 689 // Insert the vreg number 690 Ret |= (RegNum & 0x0FFFFFFF); 691 return Ret; 692 } else { 693 // Some special-use registers are actually physical registers. 694 // Encode this as the register class ID of 0 and the real register ID. 695 return Reg & 0x0FFFFFFF; 696 } 697 } 698 699 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { 700 const MCExpr *Expr; 701 Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, 702 OutContext); 703 return MCOperand::CreateExpr(Expr); 704 } 705 706 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 707 const DataLayout *TD = TM.getDataLayout(); 708 const TargetLowering *TLI = TM.getTargetLowering(); 709 710 Type *Ty = F->getReturnType(); 711 712 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 713 714 if (Ty->getTypeID() == Type::VoidTyID) 715 return; 716 717 O << " ("; 718 719 if (isABI) { 720 if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) { 721 unsigned size = 0; 722 if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { 723 size = ITy->getBitWidth(); 724 if (size < 32) 725 size = 32; 726 } else { 727 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 728 size = Ty->getPrimitiveSizeInBits(); 729 } 730 731 O << ".param .b" << size << " func_retval0"; 732 } else if (isa<PointerType>(Ty)) { 733 O << ".param .b" << TLI->getPointerTy().getSizeInBits() 734 << " func_retval0"; 735 } else { 736 if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { 737 unsigned totalsz = TD->getTypeAllocSize(Ty); 738 unsigned retAlignment = 0; 739 if (!llvm::getAlign(*F, 0, retAlignment)) 740 retAlignment = TD->getABITypeAlignment(Ty); 741 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 742 << "]"; 743 } else 744 assert(false && "Unknown return type"); 745 } 746 } else { 747 SmallVector<EVT, 16> vtparts; 748 ComputeValueVTs(*TLI, Ty, vtparts); 749 unsigned idx = 0; 750 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 751 unsigned elems = 1; 752 EVT elemtype = vtparts[i]; 753 if (vtparts[i].isVector()) { 754 elems = vtparts[i].getVectorNumElements(); 755 elemtype = vtparts[i].getVectorElementType(); 756 } 757 758 for (unsigned j = 0, je = elems; j != je; ++j) { 759 unsigned sz = elemtype.getSizeInBits(); 760 if (elemtype.isInteger() && (sz < 32)) 761 sz = 32; 762 O << ".reg .b" << sz << " func_retval" << idx; 763 if (j < je - 1) 764 O << ", "; 765 ++idx; 766 } 767 if (i < e - 1) 768 O << ", "; 769 } 770 } 771 O << ") "; 772 return; 773 } 774 775 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 776 raw_ostream &O) { 777 const Function *F = MF.getFunction(); 778 printReturnValStr(F, O); 779 } 780 781 void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 782 SmallString<128> Str; 783 raw_svector_ostream O(Str); 784 785 if (!GlobalsEmitted) { 786 emitGlobals(*MF->getFunction()->getParent()); 787 GlobalsEmitted = true; 788 } 789 790 // Set up 791 MRI = &MF->getRegInfo(); 792 F = MF->getFunction(); 793 emitLinkageDirective(F, O); 794 if (llvm::isKernelFunction(*F)) 795 O << ".entry "; 796 else { 797 O << ".func "; 798 printReturnValStr(*MF, O); 799 } 800 801 O << *CurrentFnSym; 802 803 emitFunctionParamList(*MF, O); 804 805 if (llvm::isKernelFunction(*F)) 806 emitKernelFunctionDirectives(*F, O); 807 808 OutStreamer.EmitRawText(O.str()); 809 810 prevDebugLoc = DebugLoc(); 811 } 812 813 void NVPTXAsmPrinter::EmitFunctionBodyStart() { 814 VRegMapping.clear(); 815 OutStreamer.EmitRawText(StringRef("{\n")); 816 setAndEmitFunctionVirtualRegisters(*MF); 817 818 SmallString<128> Str; 819 raw_svector_ostream O(Str); 820 emitDemotedVars(MF->getFunction(), O); 821 OutStreamer.EmitRawText(O.str()); 822 } 823 824 void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 825 OutStreamer.EmitRawText(StringRef("}\n")); 826 VRegMapping.clear(); 827 } 828 829 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { 830 unsigned RegNo = MI->getOperand(0).getReg(); 831 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 832 if (TRI->isVirtualRegister(RegNo)) { 833 OutStreamer.AddComment(Twine("implicit-def: ") + 834 getVirtualRegisterName(RegNo)); 835 } else { 836 OutStreamer.AddComment(Twine("implicit-def: ") + 837 TM.getRegisterInfo()->getName(RegNo)); 838 } 839 OutStreamer.AddBlankLine(); 840 } 841 842 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 843 raw_ostream &O) const { 844 // If the NVVM IR has some of reqntid* specified, then output 845 // the reqntid directive, and set the unspecified ones to 1. 846 // If none of reqntid* is specified, don't output reqntid directive. 847 unsigned reqntidx, reqntidy, reqntidz; 848 bool specified = false; 849 if (llvm::getReqNTIDx(F, reqntidx) == false) 850 reqntidx = 1; 851 else 852 specified = true; 853 if (llvm::getReqNTIDy(F, reqntidy) == false) 854 reqntidy = 1; 855 else 856 specified = true; 857 if (llvm::getReqNTIDz(F, reqntidz) == false) 858 reqntidz = 1; 859 else 860 specified = true; 861 862 if (specified) 863 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 864 << "\n"; 865 866 // If the NVVM IR has some of maxntid* specified, then output 867 // the maxntid directive, and set the unspecified ones to 1. 868 // If none of maxntid* is specified, don't output maxntid directive. 869 unsigned maxntidx, maxntidy, maxntidz; 870 specified = false; 871 if (llvm::getMaxNTIDx(F, maxntidx) == false) 872 maxntidx = 1; 873 else 874 specified = true; 875 if (llvm::getMaxNTIDy(F, maxntidy) == false) 876 maxntidy = 1; 877 else 878 specified = true; 879 if (llvm::getMaxNTIDz(F, maxntidz) == false) 880 maxntidz = 1; 881 else 882 specified = true; 883 884 if (specified) 885 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 886 << "\n"; 887 888 unsigned mincta; 889 if (llvm::getMinCTASm(F, mincta)) 890 O << ".minnctapersm " << mincta << "\n"; 891 } 892 893 std::string 894 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { 895 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 896 897 std::string Name; 898 raw_string_ostream NameStr(Name); 899 900 VRegRCMap::const_iterator I = VRegMapping.find(RC); 901 assert(I != VRegMapping.end() && "Bad register class"); 902 const DenseMap<unsigned, unsigned> &RegMap = I->second; 903 904 VRegMap::const_iterator VI = RegMap.find(Reg); 905 assert(VI != RegMap.end() && "Bad virtual register"); 906 unsigned MappedVR = VI->second; 907 908 NameStr << getNVPTXRegClassStr(RC) << MappedVR; 909 910 NameStr.flush(); 911 return Name; 912 } 913 914 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, 915 raw_ostream &O) { 916 O << getVirtualRegisterName(vr); 917 } 918 919 void NVPTXAsmPrinter::printVecModifiedImmediate( 920 const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 921 static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 922 int Imm = (int) MO.getImm(); 923 if (0 == strcmp(Modifier, "vecelem")) 924 O << "_" << vecelem[Imm]; 925 else if (0 == strcmp(Modifier, "vecv4comm1")) { 926 if ((Imm < 0) || (Imm > 3)) 927 O << "//"; 928 } else if (0 == strcmp(Modifier, "vecv4comm2")) { 929 if ((Imm < 4) || (Imm > 7)) 930 O << "//"; 931 } else if (0 == strcmp(Modifier, "vecv4pos")) { 932 if (Imm < 0) 933 Imm = 0; 934 O << "_" << vecelem[Imm % 4]; 935 } else if (0 == strcmp(Modifier, "vecv2comm1")) { 936 if ((Imm < 0) || (Imm > 1)) 937 O << "//"; 938 } else if (0 == strcmp(Modifier, "vecv2comm2")) { 939 if ((Imm < 2) || (Imm > 3)) 940 O << "//"; 941 } else if (0 == strcmp(Modifier, "vecv2pos")) { 942 if (Imm < 0) 943 Imm = 0; 944 O << "_" << vecelem[Imm % 2]; 945 } else 946 llvm_unreachable("Unknown Modifier on immediate operand"); 947 } 948 949 950 951 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 952 953 emitLinkageDirective(F, O); 954 if (llvm::isKernelFunction(*F)) 955 O << ".entry "; 956 else 957 O << ".func "; 958 printReturnValStr(F, O); 959 O << *getSymbol(F) << "\n"; 960 emitFunctionParamList(F, O); 961 O << ";\n"; 962 } 963 964 static bool usedInGlobalVarDef(const Constant *C) { 965 if (!C) 966 return false; 967 968 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 969 if (GV->getName().str() == "llvm.used") 970 return false; 971 return true; 972 } 973 974 for (const User *U : C->users()) 975 if (const Constant *C = dyn_cast<Constant>(U)) 976 if (usedInGlobalVarDef(C)) 977 return true; 978 979 return false; 980 } 981 982 static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 983 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 984 if (othergv->getName().str() == "llvm.used") 985 return true; 986 } 987 988 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 989 if (instr->getParent() && instr->getParent()->getParent()) { 990 const Function *curFunc = instr->getParent()->getParent(); 991 if (oneFunc && (curFunc != oneFunc)) 992 return false; 993 oneFunc = curFunc; 994 return true; 995 } else 996 return false; 997 } 998 999 if (const MDNode *md = dyn_cast<MDNode>(U)) 1000 if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || 1001 (md->getName().str() == "llvm.dbg.sp"))) 1002 return true; 1003 1004 for (const User *UU : U->users()) 1005 if (usedInOneFunc(UU, oneFunc) == false) 1006 return false; 1007 1008 return true; 1009 } 1010 1011 /* Find out if a global variable can be demoted to local scope. 1012 * Currently, this is valid for CUDA shared variables, which have local 1013 * scope and global lifetime. So the conditions to check are : 1014 * 1. Is the global variable in shared address space? 1015 * 2. Does it have internal linkage? 1016 * 3. Is the global variable referenced only in one function? 1017 */ 1018 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 1019 if (gv->hasInternalLinkage() == false) 1020 return false; 1021 const PointerType *Pty = gv->getType(); 1022 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 1023 return false; 1024 1025 const Function *oneFunc = nullptr; 1026 1027 bool flag = usedInOneFunc(gv, oneFunc); 1028 if (flag == false) 1029 return false; 1030 if (!oneFunc) 1031 return false; 1032 f = oneFunc; 1033 return true; 1034 } 1035 1036 static bool useFuncSeen(const Constant *C, 1037 llvm::DenseMap<const Function *, bool> &seenMap) { 1038 for (const User *U : C->users()) { 1039 if (const Constant *cu = dyn_cast<Constant>(U)) { 1040 if (useFuncSeen(cu, seenMap)) 1041 return true; 1042 } else if (const Instruction *I = dyn_cast<Instruction>(U)) { 1043 const BasicBlock *bb = I->getParent(); 1044 if (!bb) 1045 continue; 1046 const Function *caller = bb->getParent(); 1047 if (!caller) 1048 continue; 1049 if (seenMap.find(caller) != seenMap.end()) 1050 return true; 1051 } 1052 } 1053 return false; 1054 } 1055 1056 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 1057 llvm::DenseMap<const Function *, bool> seenMap; 1058 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 1059 const Function *F = FI; 1060 1061 if (F->isDeclaration()) { 1062 if (F->use_empty()) 1063 continue; 1064 if (F->getIntrinsicID()) 1065 continue; 1066 emitDeclaration(F, O); 1067 continue; 1068 } 1069 for (const User *U : F->users()) { 1070 if (const Constant *C = dyn_cast<Constant>(U)) { 1071 if (usedInGlobalVarDef(C)) { 1072 // The use is in the initialization of a global variable 1073 // that is a function pointer, so print a declaration 1074 // for the original function 1075 emitDeclaration(F, O); 1076 break; 1077 } 1078 // Emit a declaration of this function if the function that 1079 // uses this constant expr has already been seen. 1080 if (useFuncSeen(C, seenMap)) { 1081 emitDeclaration(F, O); 1082 break; 1083 } 1084 } 1085 1086 if (!isa<Instruction>(U)) 1087 continue; 1088 const Instruction *instr = cast<Instruction>(U); 1089 const BasicBlock *bb = instr->getParent(); 1090 if (!bb) 1091 continue; 1092 const Function *caller = bb->getParent(); 1093 if (!caller) 1094 continue; 1095 1096 // If a caller has already been seen, then the caller is 1097 // appearing in the module before the callee. so print out 1098 // a declaration for the callee. 1099 if (seenMap.find(caller) != seenMap.end()) { 1100 emitDeclaration(F, O); 1101 break; 1102 } 1103 } 1104 seenMap[F] = true; 1105 } 1106 } 1107 1108 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 1109 DebugInfoFinder DbgFinder; 1110 DbgFinder.processModule(M); 1111 1112 unsigned i = 1; 1113 for (DICompileUnit DIUnit : DbgFinder.compile_units()) { 1114 StringRef Filename(DIUnit.getFilename()); 1115 StringRef Dirname(DIUnit.getDirectory()); 1116 SmallString<128> FullPathName = Dirname; 1117 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 1118 sys::path::append(FullPathName, Filename); 1119 Filename = FullPathName.str(); 1120 } 1121 if (filenameMap.find(Filename.str()) != filenameMap.end()) 1122 continue; 1123 filenameMap[Filename.str()] = i; 1124 OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); 1125 ++i; 1126 } 1127 1128 for (DISubprogram SP : DbgFinder.subprograms()) { 1129 StringRef Filename(SP.getFilename()); 1130 StringRef Dirname(SP.getDirectory()); 1131 SmallString<128> FullPathName = Dirname; 1132 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 1133 sys::path::append(FullPathName, Filename); 1134 Filename = FullPathName.str(); 1135 } 1136 if (filenameMap.find(Filename.str()) != filenameMap.end()) 1137 continue; 1138 filenameMap[Filename.str()] = i; 1139 ++i; 1140 } 1141 } 1142 1143 bool NVPTXAsmPrinter::doInitialization(Module &M) { 1144 1145 SmallString<128> Str1; 1146 raw_svector_ostream OS1(Str1); 1147 1148 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 1149 MMI->AnalyzeModule(M); 1150 1151 // We need to call the parent's one explicitly. 1152 //bool Result = AsmPrinter::doInitialization(M); 1153 1154 // Initialize TargetLoweringObjectFile. 1155 const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 1156 .Initialize(OutContext, TM); 1157 1158 Mang = new Mangler(TM.getDataLayout()); 1159 1160 // Emit header before any dwarf directives are emitted below. 1161 emitHeader(M, OS1); 1162 OutStreamer.EmitRawText(OS1.str()); 1163 1164 // Already commented out 1165 //bool Result = AsmPrinter::doInitialization(M); 1166 1167 // Emit module-level inline asm if it exists. 1168 if (!M.getModuleInlineAsm().empty()) { 1169 OutStreamer.AddComment("Start of file scope inline assembly"); 1170 OutStreamer.AddBlankLine(); 1171 OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm())); 1172 OutStreamer.AddBlankLine(); 1173 OutStreamer.AddComment("End of file scope inline assembly"); 1174 OutStreamer.AddBlankLine(); 1175 } 1176 1177 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 1178 recordAndEmitFilenames(M); 1179 1180 GlobalsEmitted = false; 1181 1182 return false; // success 1183 } 1184 1185 void NVPTXAsmPrinter::emitGlobals(const Module &M) { 1186 SmallString<128> Str2; 1187 raw_svector_ostream OS2(Str2); 1188 1189 emitDeclarations(M, OS2); 1190 1191 // As ptxas does not support forward references of globals, we need to first 1192 // sort the list of module-level globals in def-use order. We visit each 1193 // global variable in order, and ensure that we emit it *after* its dependent 1194 // globals. We use a little extra memory maintaining both a set and a list to 1195 // have fast searches while maintaining a strict ordering. 1196 SmallVector<const GlobalVariable *, 8> Globals; 1197 DenseSet<const GlobalVariable *> GVVisited; 1198 DenseSet<const GlobalVariable *> GVVisiting; 1199 1200 // Visit each global variable, in order 1201 for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); 1202 I != E; ++I) 1203 VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); 1204 1205 assert(GVVisited.size() == M.getGlobalList().size() && 1206 "Missed a global variable"); 1207 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 1208 1209 // Print out module-level global variables in proper order 1210 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 1211 printModuleLevelGV(Globals[i], OS2); 1212 1213 OS2 << '\n'; 1214 1215 OutStreamer.EmitRawText(OS2.str()); 1216 } 1217 1218 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { 1219 O << "//\n"; 1220 O << "// Generated by LLVM NVPTX Back-End\n"; 1221 O << "//\n"; 1222 O << "\n"; 1223 1224 unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); 1225 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 1226 1227 O << ".target "; 1228 O << nvptxSubtarget.getTargetName(); 1229 1230 if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) 1231 O << ", texmode_independent"; 1232 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 1233 if (!nvptxSubtarget.hasDouble()) 1234 O << ", map_f64_to_f32"; 1235 } 1236 1237 if (MAI->doesSupportDebugInformation()) 1238 O << ", debug"; 1239 1240 O << "\n"; 1241 1242 O << ".address_size "; 1243 if (nvptxSubtarget.is64Bit()) 1244 O << "64"; 1245 else 1246 O << "32"; 1247 O << "\n"; 1248 1249 O << "\n"; 1250 } 1251 1252 bool NVPTXAsmPrinter::doFinalization(Module &M) { 1253 1254 // If we did not emit any functions, then the global declarations have not 1255 // yet been emitted. 1256 if (!GlobalsEmitted) { 1257 emitGlobals(M); 1258 GlobalsEmitted = true; 1259 } 1260 1261 // XXX Temproarily remove global variables so that doFinalization() will not 1262 // emit them again (global variables are emitted at beginning). 1263 1264 Module::GlobalListType &global_list = M.getGlobalList(); 1265 int i, n = global_list.size(); 1266 GlobalVariable **gv_array = new GlobalVariable *[n]; 1267 1268 // first, back-up GlobalVariable in gv_array 1269 i = 0; 1270 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 1271 I != E; ++I) 1272 gv_array[i++] = &*I; 1273 1274 // second, empty global_list 1275 while (!global_list.empty()) 1276 global_list.remove(global_list.begin()); 1277 1278 // call doFinalization 1279 bool ret = AsmPrinter::doFinalization(M); 1280 1281 // now we restore global variables 1282 for (i = 0; i < n; i++) 1283 global_list.insert(global_list.end(), gv_array[i]); 1284 1285 clearAnnotationCache(&M); 1286 1287 delete[] gv_array; 1288 return ret; 1289 1290 //bool Result = AsmPrinter::doFinalization(M); 1291 // Instead of calling the parents doFinalization, we may 1292 // clone parents doFinalization and customize here. 1293 // Currently, we if NVISA out the EmitGlobals() in 1294 // parent's doFinalization, which is too intrusive. 1295 // 1296 // Same for the doInitialization. 1297 //return Result; 1298 } 1299 1300 // This function emits appropriate linkage directives for 1301 // functions and global variables. 1302 // 1303 // extern function declaration -> .extern 1304 // extern function definition -> .visible 1305 // external global variable with init -> .visible 1306 // external without init -> .extern 1307 // appending -> not allowed, assert. 1308 // for any linkage other than 1309 // internal, private, linker_private, 1310 // linker_private_weak, linker_private_weak_def_auto, 1311 // we emit -> .weak. 1312 1313 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 1314 raw_ostream &O) { 1315 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 1316 if (V->hasExternalLinkage()) { 1317 if (isa<GlobalVariable>(V)) { 1318 const GlobalVariable *GVar = cast<GlobalVariable>(V); 1319 if (GVar) { 1320 if (GVar->hasInitializer()) 1321 O << ".visible "; 1322 else 1323 O << ".extern "; 1324 } 1325 } else if (V->isDeclaration()) 1326 O << ".extern "; 1327 else 1328 O << ".visible "; 1329 } else if (V->hasAppendingLinkage()) { 1330 std::string msg; 1331 msg.append("Error: "); 1332 msg.append("Symbol "); 1333 if (V->hasName()) 1334 msg.append(V->getName().str()); 1335 msg.append("has unsupported appending linkage type"); 1336 llvm_unreachable(msg.c_str()); 1337 } else if (!V->hasInternalLinkage() && 1338 !V->hasPrivateLinkage()) { 1339 O << ".weak "; 1340 } 1341 } 1342 } 1343 1344 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 1345 raw_ostream &O, 1346 bool processDemoted) { 1347 1348 // Skip meta data 1349 if (GVar->hasSection()) { 1350 if (GVar->getSection() == StringRef("llvm.metadata")) 1351 return; 1352 } 1353 1354 // Skip LLVM intrinsic global variables 1355 if (GVar->getName().startswith("llvm.") || 1356 GVar->getName().startswith("nvvm.")) 1357 return; 1358 1359 const DataLayout *TD = TM.getDataLayout(); 1360 1361 // GlobalVariables are always constant pointers themselves. 1362 const PointerType *PTy = GVar->getType(); 1363 Type *ETy = PTy->getElementType(); 1364 1365 if (GVar->hasExternalLinkage()) { 1366 if (GVar->hasInitializer()) 1367 O << ".visible "; 1368 else 1369 O << ".extern "; 1370 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() || 1371 GVar->hasAvailableExternallyLinkage() || 1372 GVar->hasCommonLinkage()) { 1373 O << ".weak "; 1374 } 1375 1376 if (llvm::isTexture(*GVar)) { 1377 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 1378 return; 1379 } 1380 1381 if (llvm::isSurface(*GVar)) { 1382 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 1383 return; 1384 } 1385 1386 if (GVar->isDeclaration()) { 1387 // (extern) declarations, no definition or initializer 1388 // Currently the only known declaration is for an automatic __local 1389 // (.shared) promoted to global. 1390 emitPTXGlobalVariable(GVar, O); 1391 O << ";\n"; 1392 return; 1393 } 1394 1395 if (llvm::isSampler(*GVar)) { 1396 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 1397 1398 const Constant *Initializer = nullptr; 1399 if (GVar->hasInitializer()) 1400 Initializer = GVar->getInitializer(); 1401 const ConstantInt *CI = nullptr; 1402 if (Initializer) 1403 CI = dyn_cast<ConstantInt>(Initializer); 1404 if (CI) { 1405 unsigned sample = CI->getZExtValue(); 1406 1407 O << " = { "; 1408 1409 for (int i = 0, 1410 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1411 i < 3; i++) { 1412 O << "addr_mode_" << i << " = "; 1413 switch (addr) { 1414 case 0: 1415 O << "wrap"; 1416 break; 1417 case 1: 1418 O << "clamp_to_border"; 1419 break; 1420 case 2: 1421 O << "clamp_to_edge"; 1422 break; 1423 case 3: 1424 O << "wrap"; 1425 break; 1426 case 4: 1427 O << "mirror"; 1428 break; 1429 } 1430 O << ", "; 1431 } 1432 O << "filter_mode = "; 1433 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1434 case 0: 1435 O << "nearest"; 1436 break; 1437 case 1: 1438 O << "linear"; 1439 break; 1440 case 2: 1441 llvm_unreachable("Anisotropic filtering is not supported"); 1442 default: 1443 O << "nearest"; 1444 break; 1445 } 1446 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1447 O << ", force_unnormalized_coords = 1"; 1448 } 1449 O << " }"; 1450 } 1451 1452 O << ";\n"; 1453 return; 1454 } 1455 1456 if (GVar->hasPrivateLinkage()) { 1457 1458 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 1459 return; 1460 1461 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1462 if (!strncmp(GVar->getName().data(), "filename", 8)) 1463 return; 1464 if (GVar->use_empty()) 1465 return; 1466 } 1467 1468 const Function *demotedFunc = nullptr; 1469 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1470 O << "// " << GVar->getName().str() << " has been demoted\n"; 1471 if (localDecls.find(demotedFunc) != localDecls.end()) 1472 localDecls[demotedFunc].push_back(GVar); 1473 else { 1474 std::vector<const GlobalVariable *> temp; 1475 temp.push_back(GVar); 1476 localDecls[demotedFunc] = temp; 1477 } 1478 return; 1479 } 1480 1481 O << "."; 1482 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1483 1484 if (isManaged(*GVar)) { 1485 O << " .attribute(.managed)"; 1486 } 1487 1488 if (GVar->getAlignment() == 0) 1489 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1490 else 1491 O << " .align " << GVar->getAlignment(); 1492 1493 if (ETy->isSingleValueType()) { 1494 O << " ."; 1495 // Special case: ABI requires that we use .u8 for predicates 1496 if (ETy->isIntegerTy(1)) 1497 O << "u8"; 1498 else 1499 O << getPTXFundamentalTypeStr(ETy, false); 1500 O << " "; 1501 O << *getSymbol(GVar); 1502 1503 // Ptx allows variable initilization only for constant and global state 1504 // spaces. 1505 if (GVar->hasInitializer()) { 1506 if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1507 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) { 1508 const Constant *Initializer = GVar->getInitializer(); 1509 // 'undef' is treated as there is no value spefied. 1510 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) { 1511 O << " = "; 1512 printScalarConstant(Initializer, O); 1513 } 1514 } else { 1515 // The frontend adds zero-initializer to variables that don't have an 1516 // initial value, so skip warning for this case. 1517 if (!GVar->getInitializer()->isNullValue()) { 1518 std::string warnMsg = "initial value of '" + GVar->getName().str() + 1519 "' is not allowed in addrspace(" + 1520 llvm::utostr_32(PTy->getAddressSpace()) + ")"; 1521 report_fatal_error(warnMsg.c_str()); 1522 } 1523 } 1524 } 1525 } else { 1526 unsigned int ElementSize = 0; 1527 1528 // Although PTX has direct support for struct type and array type and 1529 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1530 // targets that support these high level field accesses. Structs, arrays 1531 // and vectors are lowered into arrays of bytes. 1532 switch (ETy->getTypeID()) { 1533 case Type::StructTyID: 1534 case Type::ArrayTyID: 1535 case Type::VectorTyID: 1536 ElementSize = TD->getTypeStoreSize(ETy); 1537 // Ptx allows variable initilization only for constant and 1538 // global state spaces. 1539 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1540 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1541 GVar->hasInitializer()) { 1542 const Constant *Initializer = GVar->getInitializer(); 1543 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1544 AggBuffer aggBuffer(ElementSize, O, *this); 1545 bufferAggregateConstant(Initializer, &aggBuffer); 1546 if (aggBuffer.numSymbols) { 1547 if (nvptxSubtarget.is64Bit()) { 1548 O << " .u64 " << *getSymbol(GVar) << "["; 1549 O << ElementSize / 8; 1550 } else { 1551 O << " .u32 " << *getSymbol(GVar) << "["; 1552 O << ElementSize / 4; 1553 } 1554 O << "]"; 1555 } else { 1556 O << " .b8 " << *getSymbol(GVar) << "["; 1557 O << ElementSize; 1558 O << "]"; 1559 } 1560 O << " = {"; 1561 aggBuffer.print(); 1562 O << "}"; 1563 } else { 1564 O << " .b8 " << *getSymbol(GVar); 1565 if (ElementSize) { 1566 O << "["; 1567 O << ElementSize; 1568 O << "]"; 1569 } 1570 } 1571 } else { 1572 O << " .b8 " << *getSymbol(GVar); 1573 if (ElementSize) { 1574 O << "["; 1575 O << ElementSize; 1576 O << "]"; 1577 } 1578 } 1579 break; 1580 default: 1581 llvm_unreachable("type not supported yet"); 1582 } 1583 1584 } 1585 O << ";\n"; 1586 } 1587 1588 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1589 if (localDecls.find(f) == localDecls.end()) 1590 return; 1591 1592 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 1593 1594 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1595 O << "\t// demoted variable\n\t"; 1596 printModuleLevelGV(gvars[i], O, true); 1597 } 1598 } 1599 1600 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1601 raw_ostream &O) const { 1602 switch (AddressSpace) { 1603 case llvm::ADDRESS_SPACE_LOCAL: 1604 O << "local"; 1605 break; 1606 case llvm::ADDRESS_SPACE_GLOBAL: 1607 O << "global"; 1608 break; 1609 case llvm::ADDRESS_SPACE_CONST: 1610 O << "const"; 1611 break; 1612 case llvm::ADDRESS_SPACE_SHARED: 1613 O << "shared"; 1614 break; 1615 default: 1616 report_fatal_error("Bad address space found while emitting PTX"); 1617 break; 1618 } 1619 } 1620 1621 std::string 1622 NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { 1623 switch (Ty->getTypeID()) { 1624 default: 1625 llvm_unreachable("unexpected type"); 1626 break; 1627 case Type::IntegerTyID: { 1628 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1629 if (NumBits == 1) 1630 return "pred"; 1631 else if (NumBits <= 64) { 1632 std::string name = "u"; 1633 return name + utostr(NumBits); 1634 } else { 1635 llvm_unreachable("Integer too large"); 1636 break; 1637 } 1638 break; 1639 } 1640 case Type::FloatTyID: 1641 return "f32"; 1642 case Type::DoubleTyID: 1643 return "f64"; 1644 case Type::PointerTyID: 1645 if (nvptxSubtarget.is64Bit()) 1646 if (useB4PTR) 1647 return "b64"; 1648 else 1649 return "u64"; 1650 else if (useB4PTR) 1651 return "b32"; 1652 else 1653 return "u32"; 1654 } 1655 llvm_unreachable("unexpected type"); 1656 return nullptr; 1657 } 1658 1659 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1660 raw_ostream &O) { 1661 1662 const DataLayout *TD = TM.getDataLayout(); 1663 1664 // GlobalVariables are always constant pointers themselves. 1665 const PointerType *PTy = GVar->getType(); 1666 Type *ETy = PTy->getElementType(); 1667 1668 O << "."; 1669 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1670 if (GVar->getAlignment() == 0) 1671 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1672 else 1673 O << " .align " << GVar->getAlignment(); 1674 1675 if (ETy->isSingleValueType()) { 1676 O << " ."; 1677 O << getPTXFundamentalTypeStr(ETy); 1678 O << " "; 1679 O << *getSymbol(GVar); 1680 return; 1681 } 1682 1683 int64_t ElementSize = 0; 1684 1685 // Although PTX has direct support for struct type and array type and LLVM IR 1686 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1687 // support these high level field accesses. Structs and arrays are lowered 1688 // into arrays of bytes. 1689 switch (ETy->getTypeID()) { 1690 case Type::StructTyID: 1691 case Type::ArrayTyID: 1692 case Type::VectorTyID: 1693 ElementSize = TD->getTypeStoreSize(ETy); 1694 O << " .b8 " << *getSymbol(GVar) << "["; 1695 if (ElementSize) { 1696 O << itostr(ElementSize); 1697 } 1698 O << "]"; 1699 break; 1700 default: 1701 llvm_unreachable("type not supported yet"); 1702 } 1703 return; 1704 } 1705 1706 static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { 1707 if (Ty->isSingleValueType()) 1708 return TD->getPrefTypeAlignment(Ty); 1709 1710 const ArrayType *ATy = dyn_cast<ArrayType>(Ty); 1711 if (ATy) 1712 return getOpenCLAlignment(TD, ATy->getElementType()); 1713 1714 const VectorType *VTy = dyn_cast<VectorType>(Ty); 1715 if (VTy) { 1716 Type *ETy = VTy->getElementType(); 1717 unsigned int numE = VTy->getNumElements(); 1718 unsigned int alignE = TD->getPrefTypeAlignment(ETy); 1719 if (numE == 3) 1720 return 4 * alignE; 1721 else 1722 return numE * alignE; 1723 } 1724 1725 const StructType *STy = dyn_cast<StructType>(Ty); 1726 if (STy) { 1727 unsigned int alignStruct = 1; 1728 // Go through each element of the struct and find the 1729 // largest alignment. 1730 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1731 Type *ETy = STy->getElementType(i); 1732 unsigned int align = getOpenCLAlignment(TD, ETy); 1733 if (align > alignStruct) 1734 alignStruct = align; 1735 } 1736 return alignStruct; 1737 } 1738 1739 const FunctionType *FTy = dyn_cast<FunctionType>(Ty); 1740 if (FTy) 1741 return TD->getPointerPrefAlignment(); 1742 return TD->getPrefTypeAlignment(Ty); 1743 } 1744 1745 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1746 int paramIndex, raw_ostream &O) { 1747 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1748 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) 1749 O << *getSymbol(I->getParent()) << "_param_" << paramIndex; 1750 else { 1751 std::string argName = I->getName(); 1752 const char *p = argName.c_str(); 1753 while (*p) { 1754 if (*p == '.') 1755 O << "_"; 1756 else 1757 O << *p; 1758 p++; 1759 } 1760 } 1761 } 1762 1763 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { 1764 Function::const_arg_iterator I, E; 1765 int i = 0; 1766 1767 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1768 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { 1769 O << *CurrentFnSym << "_param_" << paramIndex; 1770 return; 1771 } 1772 1773 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { 1774 if (i == paramIndex) { 1775 printParamName(I, paramIndex, O); 1776 return; 1777 } 1778 } 1779 llvm_unreachable("paramIndex out of bound"); 1780 } 1781 1782 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1783 const DataLayout *TD = TM.getDataLayout(); 1784 const AttributeSet &PAL = F->getAttributes(); 1785 const TargetLowering *TLI = TM.getTargetLowering(); 1786 Function::const_arg_iterator I, E; 1787 unsigned paramIndex = 0; 1788 bool first = true; 1789 bool isKernelFunc = llvm::isKernelFunction(*F); 1790 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1791 MVT thePointerTy = TLI->getPointerTy(); 1792 1793 O << "(\n"; 1794 1795 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1796 Type *Ty = I->getType(); 1797 1798 if (!first) 1799 O << ",\n"; 1800 1801 first = false; 1802 1803 // Handle image/sampler parameters 1804 if (isKernelFunction(*F)) { 1805 if (isSampler(*I) || isImage(*I)) { 1806 if (isImage(*I)) { 1807 std::string sname = I->getName(); 1808 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { 1809 if (nvptxSubtarget.hasImageHandles()) 1810 O << "\t.param .u64 .ptr .surfref "; 1811 else 1812 O << "\t.param .surfref "; 1813 O << *CurrentFnSym << "_param_" << paramIndex; 1814 } 1815 else { // Default image is read_only 1816 if (nvptxSubtarget.hasImageHandles()) 1817 O << "\t.param .u64 .ptr .texref "; 1818 else 1819 O << "\t.param .texref "; 1820 O << *CurrentFnSym << "_param_" << paramIndex; 1821 } 1822 } else { 1823 if (nvptxSubtarget.hasImageHandles()) 1824 O << "\t.param .u64 .ptr .samplerref "; 1825 else 1826 O << "\t.param .samplerref "; 1827 O << *CurrentFnSym << "_param_" << paramIndex; 1828 } 1829 continue; 1830 } 1831 } 1832 1833 if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { 1834 if (Ty->isAggregateType() || Ty->isVectorTy()) { 1835 // Just print .param .align <a> .b8 .param[size]; 1836 // <a> = PAL.getparamalignment 1837 // size = typeallocsize of element type 1838 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1839 if (align == 0) 1840 align = TD->getABITypeAlignment(Ty); 1841 1842 unsigned sz = TD->getTypeAllocSize(Ty); 1843 O << "\t.param .align " << align << " .b8 "; 1844 printParamName(I, paramIndex, O); 1845 O << "[" << sz << "]"; 1846 1847 continue; 1848 } 1849 // Just a scalar 1850 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1851 if (isKernelFunc) { 1852 if (PTy) { 1853 // Special handling for pointer arguments to kernel 1854 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1855 1856 if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { 1857 Type *ETy = PTy->getElementType(); 1858 int addrSpace = PTy->getAddressSpace(); 1859 switch (addrSpace) { 1860 default: 1861 O << ".ptr "; 1862 break; 1863 case llvm::ADDRESS_SPACE_CONST: 1864 O << ".ptr .const "; 1865 break; 1866 case llvm::ADDRESS_SPACE_SHARED: 1867 O << ".ptr .shared "; 1868 break; 1869 case llvm::ADDRESS_SPACE_GLOBAL: 1870 O << ".ptr .global "; 1871 break; 1872 } 1873 O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; 1874 } 1875 printParamName(I, paramIndex, O); 1876 continue; 1877 } 1878 1879 // non-pointer scalar to kernel func 1880 O << "\t.param ."; 1881 // Special case: predicate operands become .u8 types 1882 if (Ty->isIntegerTy(1)) 1883 O << "u8"; 1884 else 1885 O << getPTXFundamentalTypeStr(Ty); 1886 O << " "; 1887 printParamName(I, paramIndex, O); 1888 continue; 1889 } 1890 // Non-kernel function, just print .param .b<size> for ABI 1891 // and .reg .b<size> for non-ABI 1892 unsigned sz = 0; 1893 if (isa<IntegerType>(Ty)) { 1894 sz = cast<IntegerType>(Ty)->getBitWidth(); 1895 if (sz < 32) 1896 sz = 32; 1897 } else if (isa<PointerType>(Ty)) 1898 sz = thePointerTy.getSizeInBits(); 1899 else 1900 sz = Ty->getPrimitiveSizeInBits(); 1901 if (isABI) 1902 O << "\t.param .b" << sz << " "; 1903 else 1904 O << "\t.reg .b" << sz << " "; 1905 printParamName(I, paramIndex, O); 1906 continue; 1907 } 1908 1909 // param has byVal attribute. So should be a pointer 1910 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1911 assert(PTy && "Param with byval attribute should be a pointer type"); 1912 Type *ETy = PTy->getElementType(); 1913 1914 if (isABI || isKernelFunc) { 1915 // Just print .param .align <a> .b8 .param[size]; 1916 // <a> = PAL.getparamalignment 1917 // size = typeallocsize of element type 1918 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1919 if (align == 0) 1920 align = TD->getABITypeAlignment(ETy); 1921 1922 unsigned sz = TD->getTypeAllocSize(ETy); 1923 O << "\t.param .align " << align << " .b8 "; 1924 printParamName(I, paramIndex, O); 1925 O << "[" << sz << "]"; 1926 continue; 1927 } else { 1928 // Split the ETy into constituent parts and 1929 // print .param .b<size> <name> for each part. 1930 // Further, if a part is vector, print the above for 1931 // each vector element. 1932 SmallVector<EVT, 16> vtparts; 1933 ComputeValueVTs(*TLI, ETy, vtparts); 1934 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1935 unsigned elems = 1; 1936 EVT elemtype = vtparts[i]; 1937 if (vtparts[i].isVector()) { 1938 elems = vtparts[i].getVectorNumElements(); 1939 elemtype = vtparts[i].getVectorElementType(); 1940 } 1941 1942 for (unsigned j = 0, je = elems; j != je; ++j) { 1943 unsigned sz = elemtype.getSizeInBits(); 1944 if (elemtype.isInteger() && (sz < 32)) 1945 sz = 32; 1946 O << "\t.reg .b" << sz << " "; 1947 printParamName(I, paramIndex, O); 1948 if (j < je - 1) 1949 O << ",\n"; 1950 ++paramIndex; 1951 } 1952 if (i < e - 1) 1953 O << ",\n"; 1954 } 1955 --paramIndex; 1956 continue; 1957 } 1958 } 1959 1960 O << "\n)\n"; 1961 } 1962 1963 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1964 raw_ostream &O) { 1965 const Function *F = MF.getFunction(); 1966 emitFunctionParamList(F, O); 1967 } 1968 1969 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1970 const MachineFunction &MF) { 1971 SmallString<128> Str; 1972 raw_svector_ostream O(Str); 1973 1974 // Map the global virtual register number to a register class specific 1975 // virtual register number starting from 1 with that class. 1976 const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 1977 //unsigned numRegClasses = TRI->getNumRegClasses(); 1978 1979 // Emit the Fake Stack Object 1980 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1981 int NumBytes = (int) MFI->getStackSize(); 1982 if (NumBytes) { 1983 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME 1984 << getFunctionNumber() << "[" << NumBytes << "];\n"; 1985 if (nvptxSubtarget.is64Bit()) { 1986 O << "\t.reg .b64 \t%SP;\n"; 1987 O << "\t.reg .b64 \t%SPL;\n"; 1988 } else { 1989 O << "\t.reg .b32 \t%SP;\n"; 1990 O << "\t.reg .b32 \t%SPL;\n"; 1991 } 1992 } 1993 1994 // Go through all virtual registers to establish the mapping between the 1995 // global virtual 1996 // register number and the per class virtual register number. 1997 // We use the per class virtual register number in the ptx output. 1998 unsigned int numVRs = MRI->getNumVirtRegs(); 1999 for (unsigned i = 0; i < numVRs; i++) { 2000 unsigned int vr = TRI->index2VirtReg(i); 2001 const TargetRegisterClass *RC = MRI->getRegClass(vr); 2002 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 2003 int n = regmap.size(); 2004 regmap.insert(std::make_pair(vr, n + 1)); 2005 } 2006 2007 // Emit register declarations 2008 // @TODO: Extract out the real register usage 2009 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 2010 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 2011 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 2012 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 2013 // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; 2014 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 2015 // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; 2016 2017 // Emit declaration of the virtual registers or 'physical' registers for 2018 // each register class 2019 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 2020 const TargetRegisterClass *RC = TRI->getRegClass(i); 2021 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 2022 std::string rcname = getNVPTXRegClassName(RC); 2023 std::string rcStr = getNVPTXRegClassStr(RC); 2024 int n = regmap.size(); 2025 2026 // Only declare those registers that may be used. 2027 if (n) { 2028 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 2029 << ">;\n"; 2030 } 2031 } 2032 2033 OutStreamer.EmitRawText(O.str()); 2034 } 2035 2036 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 2037 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 2038 bool ignored; 2039 unsigned int numHex; 2040 const char *lead; 2041 2042 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 2043 numHex = 8; 2044 lead = "0f"; 2045 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); 2046 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 2047 numHex = 16; 2048 lead = "0d"; 2049 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); 2050 } else 2051 llvm_unreachable("unsupported fp type"); 2052 2053 APInt API = APF.bitcastToAPInt(); 2054 std::string hexstr(utohexstr(API.getZExtValue())); 2055 O << lead; 2056 if (hexstr.length() < numHex) 2057 O << std::string(numHex - hexstr.length(), '0'); 2058 O << utohexstr(API.getZExtValue()); 2059 } 2060 2061 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 2062 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 2063 O << CI->getValue(); 2064 return; 2065 } 2066 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 2067 printFPConstant(CFP, O); 2068 return; 2069 } 2070 if (isa<ConstantPointerNull>(CPV)) { 2071 O << "0"; 2072 return; 2073 } 2074 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 2075 PointerType *PTy = dyn_cast<PointerType>(GVar->getType()); 2076 bool IsNonGenericPointer = false; 2077 if (PTy && PTy->getAddressSpace() != 0) { 2078 IsNonGenericPointer = true; 2079 } 2080 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { 2081 O << "generic("; 2082 O << *getSymbol(GVar); 2083 O << ")"; 2084 } else { 2085 O << *getSymbol(GVar); 2086 } 2087 return; 2088 } 2089 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 2090 const Value *v = Cexpr->stripPointerCasts(); 2091 PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType()); 2092 bool IsNonGenericPointer = false; 2093 if (PTy && PTy->getAddressSpace() != 0) { 2094 IsNonGenericPointer = true; 2095 } 2096 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 2097 if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { 2098 O << "generic("; 2099 O << *getSymbol(GVar); 2100 O << ")"; 2101 } else { 2102 O << *getSymbol(GVar); 2103 } 2104 return; 2105 } else { 2106 O << *LowerConstant(CPV, *this); 2107 return; 2108 } 2109 } 2110 llvm_unreachable("Not scalar type found in printScalarConstant()"); 2111 } 2112 2113 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 2114 AggBuffer *aggBuffer) { 2115 2116 const DataLayout *TD = TM.getDataLayout(); 2117 2118 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 2119 int s = TD->getTypeAllocSize(CPV->getType()); 2120 if (s < Bytes) 2121 s = Bytes; 2122 aggBuffer->addZeros(s); 2123 return; 2124 } 2125 2126 unsigned char *ptr; 2127 switch (CPV->getType()->getTypeID()) { 2128 2129 case Type::IntegerTyID: { 2130 const Type *ETy = CPV->getType(); 2131 if (ETy == Type::getInt8Ty(CPV->getContext())) { 2132 unsigned char c = 2133 (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 2134 ptr = &c; 2135 aggBuffer->addBytes(ptr, 1, Bytes); 2136 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 2137 short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 2138 ptr = (unsigned char *)&int16; 2139 aggBuffer->addBytes(ptr, 2, Bytes); 2140 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 2141 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 2142 int int32 = (int)(constInt->getZExtValue()); 2143 ptr = (unsigned char *)&int32; 2144 aggBuffer->addBytes(ptr, 4, Bytes); 2145 break; 2146 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 2147 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 2148 ConstantFoldConstantExpression(Cexpr, TD))) { 2149 int int32 = (int)(constInt->getZExtValue()); 2150 ptr = (unsigned char *)&int32; 2151 aggBuffer->addBytes(ptr, 4, Bytes); 2152 break; 2153 } 2154 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 2155 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 2156 aggBuffer->addSymbol(v); 2157 aggBuffer->addZeros(4); 2158 break; 2159 } 2160 } 2161 llvm_unreachable("unsupported integer const type"); 2162 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 2163 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 2164 long long int64 = (long long)(constInt->getZExtValue()); 2165 ptr = (unsigned char *)&int64; 2166 aggBuffer->addBytes(ptr, 8, Bytes); 2167 break; 2168 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 2169 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 2170 ConstantFoldConstantExpression(Cexpr, TD))) { 2171 long long int64 = (long long)(constInt->getZExtValue()); 2172 ptr = (unsigned char *)&int64; 2173 aggBuffer->addBytes(ptr, 8, Bytes); 2174 break; 2175 } 2176 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 2177 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 2178 aggBuffer->addSymbol(v); 2179 aggBuffer->addZeros(8); 2180 break; 2181 } 2182 } 2183 llvm_unreachable("unsupported integer const type"); 2184 } else 2185 llvm_unreachable("unsupported integer const type"); 2186 break; 2187 } 2188 case Type::FloatTyID: 2189 case Type::DoubleTyID: { 2190 const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 2191 const Type *Ty = CFP->getType(); 2192 if (Ty == Type::getFloatTy(CPV->getContext())) { 2193 float float32 = (float) CFP->getValueAPF().convertToFloat(); 2194 ptr = (unsigned char *)&float32; 2195 aggBuffer->addBytes(ptr, 4, Bytes); 2196 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 2197 double float64 = CFP->getValueAPF().convertToDouble(); 2198 ptr = (unsigned char *)&float64; 2199 aggBuffer->addBytes(ptr, 8, Bytes); 2200 } else { 2201 llvm_unreachable("unsupported fp const type"); 2202 } 2203 break; 2204 } 2205 case Type::PointerTyID: { 2206 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 2207 aggBuffer->addSymbol(GVar); 2208 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 2209 const Value *v = Cexpr->stripPointerCasts(); 2210 aggBuffer->addSymbol(v); 2211 } 2212 unsigned int s = TD->getTypeAllocSize(CPV->getType()); 2213 aggBuffer->addZeros(s); 2214 break; 2215 } 2216 2217 case Type::ArrayTyID: 2218 case Type::VectorTyID: 2219 case Type::StructTyID: { 2220 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 2221 isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) { 2222 int ElementSize = TD->getTypeAllocSize(CPV->getType()); 2223 bufferAggregateConstant(CPV, aggBuffer); 2224 if (Bytes > ElementSize) 2225 aggBuffer->addZeros(Bytes - ElementSize); 2226 } else if (isa<ConstantAggregateZero>(CPV)) 2227 aggBuffer->addZeros(Bytes); 2228 else 2229 llvm_unreachable("Unexpected Constant type"); 2230 break; 2231 } 2232 2233 default: 2234 llvm_unreachable("unsupported type"); 2235 } 2236 } 2237 2238 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 2239 AggBuffer *aggBuffer) { 2240 const DataLayout *TD = TM.getDataLayout(); 2241 int Bytes; 2242 2243 // Old constants 2244 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 2245 if (CPV->getNumOperands()) 2246 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 2247 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 2248 return; 2249 } 2250 2251 if (const ConstantDataSequential *CDS = 2252 dyn_cast<ConstantDataSequential>(CPV)) { 2253 if (CDS->getNumElements()) 2254 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 2255 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 2256 aggBuffer); 2257 return; 2258 } 2259 2260 if (isa<ConstantStruct>(CPV)) { 2261 if (CPV->getNumOperands()) { 2262 StructType *ST = cast<StructType>(CPV->getType()); 2263 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 2264 if (i == (e - 1)) 2265 Bytes = TD->getStructLayout(ST)->getElementOffset(0) + 2266 TD->getTypeAllocSize(ST) - 2267 TD->getStructLayout(ST)->getElementOffset(i); 2268 else 2269 Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - 2270 TD->getStructLayout(ST)->getElementOffset(i); 2271 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 2272 } 2273 } 2274 return; 2275 } 2276 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 2277 } 2278 2279 // buildTypeNameMap - Run through symbol table looking for type names. 2280 // 2281 2282 bool NVPTXAsmPrinter::isImageType(const Type *Ty) { 2283 2284 std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); 2285 2286 if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || 2287 !PI->second.compare("struct._image2d_t") || 2288 !PI->second.compare("struct._image3d_t"))) 2289 return true; 2290 2291 return false; 2292 } 2293 2294 2295 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 2296 switch (MI.getOpcode()) { 2297 default: 2298 return false; 2299 case NVPTX::CallArgBeginInst: 2300 case NVPTX::CallArgEndInst0: 2301 case NVPTX::CallArgEndInst1: 2302 case NVPTX::CallArgF32: 2303 case NVPTX::CallArgF64: 2304 case NVPTX::CallArgI16: 2305 case NVPTX::CallArgI32: 2306 case NVPTX::CallArgI32imm: 2307 case NVPTX::CallArgI64: 2308 case NVPTX::CallArgParam: 2309 case NVPTX::CallVoidInst: 2310 case NVPTX::CallVoidInstReg: 2311 case NVPTX::Callseq_End: 2312 case NVPTX::CallVoidInstReg64: 2313 case NVPTX::DeclareParamInst: 2314 case NVPTX::DeclareRetMemInst: 2315 case NVPTX::DeclareRetRegInst: 2316 case NVPTX::DeclareRetScalarInst: 2317 case NVPTX::DeclareScalarParamInst: 2318 case NVPTX::DeclareScalarRegInst: 2319 case NVPTX::StoreParamF32: 2320 case NVPTX::StoreParamF64: 2321 case NVPTX::StoreParamI16: 2322 case NVPTX::StoreParamI32: 2323 case NVPTX::StoreParamI64: 2324 case NVPTX::StoreParamI8: 2325 case NVPTX::StoreRetvalF32: 2326 case NVPTX::StoreRetvalF64: 2327 case NVPTX::StoreRetvalI16: 2328 case NVPTX::StoreRetvalI32: 2329 case NVPTX::StoreRetvalI64: 2330 case NVPTX::StoreRetvalI8: 2331 case NVPTX::LastCallArgF32: 2332 case NVPTX::LastCallArgF64: 2333 case NVPTX::LastCallArgI16: 2334 case NVPTX::LastCallArgI32: 2335 case NVPTX::LastCallArgI32imm: 2336 case NVPTX::LastCallArgI64: 2337 case NVPTX::LastCallArgParam: 2338 case NVPTX::LoadParamMemF32: 2339 case NVPTX::LoadParamMemF64: 2340 case NVPTX::LoadParamMemI16: 2341 case NVPTX::LoadParamMemI32: 2342 case NVPTX::LoadParamMemI64: 2343 case NVPTX::LoadParamMemI8: 2344 case NVPTX::PrototypeInst: 2345 case NVPTX::DBG_VALUE: 2346 return true; 2347 } 2348 return false; 2349 } 2350 2351 /// PrintAsmOperand - Print out an operand for an inline asm expression. 2352 /// 2353 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 2354 unsigned AsmVariant, 2355 const char *ExtraCode, raw_ostream &O) { 2356 if (ExtraCode && ExtraCode[0]) { 2357 if (ExtraCode[1] != 0) 2358 return true; // Unknown modifier. 2359 2360 switch (ExtraCode[0]) { 2361 default: 2362 // See if this is a generic print operand 2363 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 2364 case 'r': 2365 break; 2366 } 2367 } 2368 2369 printOperand(MI, OpNo, O); 2370 2371 return false; 2372 } 2373 2374 bool NVPTXAsmPrinter::PrintAsmMemoryOperand( 2375 const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, 2376 const char *ExtraCode, raw_ostream &O) { 2377 if (ExtraCode && ExtraCode[0]) 2378 return true; // Unknown modifier 2379 2380 O << '['; 2381 printMemOperand(MI, OpNo, O); 2382 O << ']'; 2383 2384 return false; 2385 } 2386 2387 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 2388 raw_ostream &O, const char *Modifier) { 2389 const MachineOperand &MO = MI->getOperand(opNum); 2390 switch (MO.getType()) { 2391 case MachineOperand::MO_Register: 2392 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 2393 if (MO.getReg() == NVPTX::VRDepot) 2394 O << DEPOTNAME << getFunctionNumber(); 2395 else 2396 O << NVPTXInstPrinter::getRegisterName(MO.getReg()); 2397 } else { 2398 emitVirtualRegister(MO.getReg(), O); 2399 } 2400 return; 2401 2402 case MachineOperand::MO_Immediate: 2403 if (!Modifier) 2404 O << MO.getImm(); 2405 else if (strstr(Modifier, "vec") == Modifier) 2406 printVecModifiedImmediate(MO, Modifier, O); 2407 else 2408 llvm_unreachable( 2409 "Don't know how to handle modifier on immediate operand"); 2410 return; 2411 2412 case MachineOperand::MO_FPImmediate: 2413 printFPConstant(MO.getFPImm(), O); 2414 break; 2415 2416 case MachineOperand::MO_GlobalAddress: 2417 O << *getSymbol(MO.getGlobal()); 2418 break; 2419 2420 case MachineOperand::MO_MachineBasicBlock: 2421 O << *MO.getMBB()->getSymbol(); 2422 return; 2423 2424 default: 2425 llvm_unreachable("Operand type not supported."); 2426 } 2427 } 2428 2429 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 2430 raw_ostream &O, const char *Modifier) { 2431 printOperand(MI, opNum, O); 2432 2433 if (Modifier && !strcmp(Modifier, "add")) { 2434 O << ", "; 2435 printOperand(MI, opNum + 1, O); 2436 } else { 2437 if (MI->getOperand(opNum + 1).isImm() && 2438 MI->getOperand(opNum + 1).getImm() == 0) 2439 return; // don't print ',0' or '+0' 2440 O << "+"; 2441 printOperand(MI, opNum + 1, O); 2442 } 2443 } 2444 2445 2446 // Force static initialization. 2447 extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { 2448 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2449 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2450 } 2451 2452 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2453 std::stringstream temp; 2454 LineReader *reader = this->getReader(filename.str()); 2455 temp << "\n//"; 2456 temp << filename.str(); 2457 temp << ":"; 2458 temp << line; 2459 temp << " "; 2460 temp << reader->readLine(line); 2461 temp << "\n"; 2462 this->OutStreamer.EmitRawText(Twine(temp.str())); 2463 } 2464 2465 LineReader *NVPTXAsmPrinter::getReader(std::string filename) { 2466 if (!reader) { 2467 reader = new LineReader(filename); 2468 } 2469 2470 if (reader->fileName() != filename) { 2471 delete reader; 2472 reader = new LineReader(filename); 2473 } 2474 2475 return reader; 2476 } 2477 2478 std::string LineReader::readLine(unsigned lineNum) { 2479 if (lineNum < theCurLine) { 2480 theCurLine = 0; 2481 fstr.seekg(0, std::ios::beg); 2482 } 2483 while (theCurLine < lineNum) { 2484 fstr.getline(buff, 500); 2485 theCurLine++; 2486 } 2487 return buff; 2488 } 2489 2490 // Force static initialization. 2491 extern "C" void LLVMInitializeNVPTXAsmPrinter() { 2492 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2493 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2494 } 2495