Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains a printer that converts from our internal representation
     11 // of machine-dependent LLVM code to NVPTX assembly language.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "NVPTXAsmPrinter.h"
     16 #include "InstPrinter/NVPTXInstPrinter.h"
     17 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
     18 #include "NVPTX.h"
     19 #include "NVPTXInstrInfo.h"
     20 #include "NVPTXMCExpr.h"
     21 #include "NVPTXMachineFunctionInfo.h"
     22 #include "NVPTXRegisterInfo.h"
     23 #include "NVPTXTargetMachine.h"
     24 #include "NVPTXUtilities.h"
     25 #include "cl_common_defines.h"
     26 #include "llvm/ADT/StringExtras.h"
     27 #include "llvm/Analysis/ConstantFolding.h"
     28 #include "llvm/CodeGen/Analysis.h"
     29 #include "llvm/CodeGen/MachineFrameInfo.h"
     30 #include "llvm/CodeGen/MachineLoopInfo.h"
     31 #include "llvm/CodeGen/MachineModuleInfo.h"
     32 #include "llvm/CodeGen/MachineRegisterInfo.h"
     33 #include "llvm/IR/DebugInfo.h"
     34 #include "llvm/IR/DerivedTypes.h"
     35 #include "llvm/IR/Function.h"
     36 #include "llvm/IR/GlobalVariable.h"
     37 #include "llvm/IR/Mangler.h"
     38 #include "llvm/IR/Module.h"
     39 #include "llvm/IR/Operator.h"
     40 #include "llvm/MC/MCInst.h"
     41 #include "llvm/MC/MCStreamer.h"
     42 #include "llvm/MC/MCSymbol.h"
     43 #include "llvm/Support/CommandLine.h"
     44 #include "llvm/Support/ErrorHandling.h"
     45 #include "llvm/Support/FormattedStream.h"
     46 #include "llvm/Support/Path.h"
     47 #include "llvm/Support/TargetRegistry.h"
     48 #include "llvm/Support/TimeValue.h"
     49 #include "llvm/Target/TargetLoweringObjectFile.h"
     50 #include "llvm/Transforms/Utils/UnrollLoop.h"
     51 #include <sstream>
     52 using namespace llvm;
     53 
     54 #define DEPOTNAME "__local_depot"
     55 
     56 static cl::opt<bool>
     57 EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden,
     58                 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
     59                 cl::init(true));
     60 
     61 static cl::opt<bool>
     62 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden,
     63               cl::desc("NVPTX Specific: Emit source line in ptx file"),
     64               cl::init(false));
     65 
     66 namespace {
     67 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
     68 /// depends.
     69 void DiscoverDependentGlobals(const Value *V,
     70                               DenseSet<const GlobalVariable *> &Globals) {
     71   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     72     Globals.insert(GV);
     73   else {
     74     if (const User *U = dyn_cast<User>(V)) {
     75       for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
     76         DiscoverDependentGlobals(U->getOperand(i), Globals);
     77       }
     78     }
     79   }
     80 }
     81 
     82 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
     83 /// instances to be emitted, but only after any dependents have been added
     84 /// first.
     85 void VisitGlobalVariableForEmission(
     86     const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
     87     DenseSet<const GlobalVariable *> &Visited,
     88     DenseSet<const GlobalVariable *> &Visiting) {
     89   // Have we already visited this one?
     90   if (Visited.count(GV))
     91     return;
     92 
     93   // Do we have a circular dependency?
     94   if (!Visiting.insert(GV).second)
     95     report_fatal_error("Circular dependency found in global variable set");
     96 
     97   // Make sure we visit all dependents first
     98   DenseSet<const GlobalVariable *> Others;
     99   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
    100     DiscoverDependentGlobals(GV->getOperand(i), Others);
    101 
    102   for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
    103                                                   E = Others.end();
    104        I != E; ++I)
    105     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
    106 
    107   // Now we can visit ourself
    108   Order.push_back(GV);
    109   Visited.insert(GV);
    110   Visiting.erase(GV);
    111 }
    112 }
    113 
    114 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
    115   if (!EmitLineNumbers)
    116     return;
    117   if (ignoreLoc(MI))
    118     return;
    119 
    120   const DebugLoc &curLoc = MI.getDebugLoc();
    121 
    122   if (!prevDebugLoc && !curLoc)
    123     return;
    124 
    125   if (prevDebugLoc == curLoc)
    126     return;
    127 
    128   prevDebugLoc = curLoc;
    129 
    130   if (!curLoc)
    131     return;
    132 
    133   auto *Scope = cast_or_null<DIScope>(curLoc.getScope());
    134   if (!Scope)
    135      return;
    136 
    137   StringRef fileName(Scope->getFilename());
    138   StringRef dirName(Scope->getDirectory());
    139   SmallString<128> FullPathName = dirName;
    140   if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
    141     sys::path::append(FullPathName, fileName);
    142     fileName = FullPathName;
    143   }
    144 
    145   if (filenameMap.find(fileName) == filenameMap.end())
    146     return;
    147 
    148   // Emit the line from the source file.
    149   if (InterleaveSrc)
    150     this->emitSrcInText(fileName, curLoc.getLine());
    151 
    152   std::stringstream temp;
    153   temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine()
    154        << " " << curLoc.getCol();
    155   OutStreamer->EmitRawText(temp.str());
    156 }
    157 
    158 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    159   SmallString<128> Str;
    160   raw_svector_ostream OS(Str);
    161   if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA)
    162     emitLineNumberAsDotLoc(*MI);
    163 
    164   MCInst Inst;
    165   lowerToMCInst(MI, Inst);
    166   EmitToStreamer(*OutStreamer, Inst);
    167 }
    168 
    169 // Handle symbol backtracking for targets that do not support image handles
    170 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
    171                                            unsigned OpNo, MCOperand &MCOp) {
    172   const MachineOperand &MO = MI->getOperand(OpNo);
    173   const MCInstrDesc &MCID = MI->getDesc();
    174 
    175   if (MCID.TSFlags & NVPTXII::IsTexFlag) {
    176     // This is a texture fetch, so operand 4 is a texref and operand 5 is
    177     // a samplerref
    178     if (OpNo == 4 && MO.isImm()) {
    179       lowerImageHandleSymbol(MO.getImm(), MCOp);
    180       return true;
    181     }
    182     if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
    183       lowerImageHandleSymbol(MO.getImm(), MCOp);
    184       return true;
    185     }
    186 
    187     return false;
    188   } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
    189     unsigned VecSize =
    190       1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
    191 
    192     // For a surface load of vector size N, the Nth operand will be the surfref
    193     if (OpNo == VecSize && MO.isImm()) {
    194       lowerImageHandleSymbol(MO.getImm(), MCOp);
    195       return true;
    196     }
    197 
    198     return false;
    199   } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
    200     // This is a surface store, so operand 0 is a surfref
    201     if (OpNo == 0 && MO.isImm()) {
    202       lowerImageHandleSymbol(MO.getImm(), MCOp);
    203       return true;
    204     }
    205 
    206     return false;
    207   } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
    208     // This is a query, so operand 1 is a surfref/texref
    209     if (OpNo == 1 && MO.isImm()) {
    210       lowerImageHandleSymbol(MO.getImm(), MCOp);
    211       return true;
    212     }
    213 
    214     return false;
    215   }
    216 
    217   return false;
    218 }
    219 
    220 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
    221   // Ewwww
    222   TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
    223   NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
    224   const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
    225   const char *Sym = MFI->getImageHandleSymbol(Index);
    226   std::string *SymNamePtr =
    227     nvTM.getManagedStrPool()->getManagedString(Sym);
    228   MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(
    229     StringRef(SymNamePtr->c_str())));
    230 }
    231 
    232 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
    233   OutMI.setOpcode(MI->getOpcode());
    234   // Special: Do not mangle symbol operand of CALL_PROTOTYPE
    235   if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
    236     const MachineOperand &MO = MI->getOperand(0);
    237     OutMI.addOperand(GetSymbolRef(
    238       OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
    239     return;
    240   }
    241 
    242   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    243     const MachineOperand &MO = MI->getOperand(i);
    244 
    245     MCOperand MCOp;
    246     if (!nvptxSubtarget->hasImageHandles()) {
    247       if (lowerImageHandleOperand(MI, i, MCOp)) {
    248         OutMI.addOperand(MCOp);
    249         continue;
    250       }
    251     }
    252 
    253     if (lowerOperand(MO, MCOp))
    254       OutMI.addOperand(MCOp);
    255   }
    256 }
    257 
    258 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
    259                                    MCOperand &MCOp) {
    260   switch (MO.getType()) {
    261   default: llvm_unreachable("unknown operand type");
    262   case MachineOperand::MO_Register:
    263     MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
    264     break;
    265   case MachineOperand::MO_Immediate:
    266     MCOp = MCOperand::createImm(MO.getImm());
    267     break;
    268   case MachineOperand::MO_MachineBasicBlock:
    269     MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
    270         MO.getMBB()->getSymbol(), OutContext));
    271     break;
    272   case MachineOperand::MO_ExternalSymbol:
    273     MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
    274     break;
    275   case MachineOperand::MO_GlobalAddress:
    276     MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
    277     break;
    278   case MachineOperand::MO_FPImmediate: {
    279     const ConstantFP *Cnt = MO.getFPImm();
    280     const APFloat &Val = Cnt->getValueAPF();
    281 
    282     switch (Cnt->getType()->getTypeID()) {
    283     default: report_fatal_error("Unsupported FP type"); break;
    284     case Type::FloatTyID:
    285       MCOp = MCOperand::createExpr(
    286         NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
    287       break;
    288     case Type::DoubleTyID:
    289       MCOp = MCOperand::createExpr(
    290         NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext));
    291       break;
    292     }
    293     break;
    294   }
    295   }
    296   return true;
    297 }
    298 
    299 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
    300   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    301     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
    302 
    303     DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
    304     unsigned RegNum = RegMap[Reg];
    305 
    306     // Encode the register class in the upper 4 bits
    307     // Must be kept in sync with NVPTXInstPrinter::printRegName
    308     unsigned Ret = 0;
    309     if (RC == &NVPTX::Int1RegsRegClass) {
    310       Ret = (1 << 28);
    311     } else if (RC == &NVPTX::Int16RegsRegClass) {
    312       Ret = (2 << 28);
    313     } else if (RC == &NVPTX::Int32RegsRegClass) {
    314       Ret = (3 << 28);
    315     } else if (RC == &NVPTX::Int64RegsRegClass) {
    316       Ret = (4 << 28);
    317     } else if (RC == &NVPTX::Float32RegsRegClass) {
    318       Ret = (5 << 28);
    319     } else if (RC == &NVPTX::Float64RegsRegClass) {
    320       Ret = (6 << 28);
    321     } else {
    322       report_fatal_error("Bad register class");
    323     }
    324 
    325     // Insert the vreg number
    326     Ret |= (RegNum & 0x0FFFFFFF);
    327     return Ret;
    328   } else {
    329     // Some special-use registers are actually physical registers.
    330     // Encode this as the register class ID of 0 and the real register ID.
    331     return Reg & 0x0FFFFFFF;
    332   }
    333 }
    334 
    335 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
    336   const MCExpr *Expr;
    337   Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
    338                                  OutContext);
    339   return MCOperand::createExpr(Expr);
    340 }
    341 
    342 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
    343   const DataLayout &DL = getDataLayout();
    344   const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
    345 
    346   Type *Ty = F->getReturnType();
    347 
    348   bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
    349 
    350   if (Ty->getTypeID() == Type::VoidTyID)
    351     return;
    352 
    353   O << " (";
    354 
    355   if (isABI) {
    356     if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
    357       unsigned size = 0;
    358       if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
    359         size = ITy->getBitWidth();
    360         if (size < 32)
    361           size = 32;
    362       } else {
    363         assert(Ty->isFloatingPointTy() && "Floating point type expected here");
    364         size = Ty->getPrimitiveSizeInBits();
    365       }
    366 
    367       O << ".param .b" << size << " func_retval0";
    368     } else if (isa<PointerType>(Ty)) {
    369       O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
    370         << " func_retval0";
    371     } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
    372       unsigned totalsz = DL.getTypeAllocSize(Ty);
    373        unsigned retAlignment = 0;
    374        if (!llvm::getAlign(*F, 0, retAlignment))
    375          retAlignment = DL.getABITypeAlignment(Ty);
    376        O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
    377          << "]";
    378     } else
    379       llvm_unreachable("Unknown return type");
    380   } else {
    381     SmallVector<EVT, 16> vtparts;
    382     ComputeValueVTs(*TLI, DL, Ty, vtparts);
    383     unsigned idx = 0;
    384     for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
    385       unsigned elems = 1;
    386       EVT elemtype = vtparts[i];
    387       if (vtparts[i].isVector()) {
    388         elems = vtparts[i].getVectorNumElements();
    389         elemtype = vtparts[i].getVectorElementType();
    390       }
    391 
    392       for (unsigned j = 0, je = elems; j != je; ++j) {
    393         unsigned sz = elemtype.getSizeInBits();
    394         if (elemtype.isInteger() && (sz < 32))
    395           sz = 32;
    396         O << ".reg .b" << sz << " func_retval" << idx;
    397         if (j < je - 1)
    398           O << ", ";
    399         ++idx;
    400       }
    401       if (i < e - 1)
    402         O << ", ";
    403     }
    404   }
    405   O << ") ";
    406   return;
    407 }
    408 
    409 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
    410                                         raw_ostream &O) {
    411   const Function *F = MF.getFunction();
    412   printReturnValStr(F, O);
    413 }
    414 
    415 // Return true if MBB is the header of a loop marked with
    416 // llvm.loop.unroll.disable.
    417 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
    418 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
    419     const MachineBasicBlock &MBB) const {
    420   MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
    421   // We insert .pragma "nounroll" only to the loop header.
    422   if (!LI.isLoopHeader(&MBB))
    423     return false;
    424 
    425   // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
    426   // we iterate through each back edge of the loop with header MBB, and check
    427   // whether its metadata contains llvm.loop.unroll.disable.
    428   for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
    429     const MachineBasicBlock *PMBB = *I;
    430     if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
    431       // Edges from other loops to MBB are not back edges.
    432       continue;
    433     }
    434     if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
    435       if (MDNode *LoopID =
    436               PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
    437         if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
    438           return true;
    439       }
    440     }
    441   }
    442   return false;
    443 }
    444 
    445 void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
    446   AsmPrinter::EmitBasicBlockStart(MBB);
    447   if (isLoopHeaderOfNoUnroll(MBB))
    448     OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
    449 }
    450 
    451 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
    452   SmallString<128> Str;
    453   raw_svector_ostream O(Str);
    454 
    455   if (!GlobalsEmitted) {
    456     emitGlobals(*MF->getFunction()->getParent());
    457     GlobalsEmitted = true;
    458   }
    459 
    460   // Set up
    461   MRI = &MF->getRegInfo();
    462   F = MF->getFunction();
    463   emitLinkageDirective(F, O);
    464   if (llvm::isKernelFunction(*F))
    465     O << ".entry ";
    466   else {
    467     O << ".func ";
    468     printReturnValStr(*MF, O);
    469   }
    470 
    471   CurrentFnSym->print(O, MAI);
    472 
    473   emitFunctionParamList(*MF, O);
    474 
    475   if (llvm::isKernelFunction(*F))
    476     emitKernelFunctionDirectives(*F, O);
    477 
    478   OutStreamer->EmitRawText(O.str());
    479 
    480   prevDebugLoc = DebugLoc();
    481 }
    482 
    483 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
    484   VRegMapping.clear();
    485   OutStreamer->EmitRawText(StringRef("{\n"));
    486   setAndEmitFunctionVirtualRegisters(*MF);
    487 
    488   SmallString<128> Str;
    489   raw_svector_ostream O(Str);
    490   emitDemotedVars(MF->getFunction(), O);
    491   OutStreamer->EmitRawText(O.str());
    492 }
    493 
    494 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
    495   OutStreamer->EmitRawText(StringRef("}\n"));
    496   VRegMapping.clear();
    497 }
    498 
    499 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
    500   unsigned RegNo = MI->getOperand(0).getReg();
    501   if (TargetRegisterInfo::isVirtualRegister(RegNo)) {
    502     OutStreamer->AddComment(Twine("implicit-def: ") +
    503                             getVirtualRegisterName(RegNo));
    504   } else {
    505     OutStreamer->AddComment(Twine("implicit-def: ") +
    506                             nvptxSubtarget->getRegisterInfo()->getName(RegNo));
    507   }
    508   OutStreamer->AddBlankLine();
    509 }
    510 
    511 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
    512                                                    raw_ostream &O) const {
    513   // If the NVVM IR has some of reqntid* specified, then output
    514   // the reqntid directive, and set the unspecified ones to 1.
    515   // If none of reqntid* is specified, don't output reqntid directive.
    516   unsigned reqntidx, reqntidy, reqntidz;
    517   bool specified = false;
    518   if (!llvm::getReqNTIDx(F, reqntidx))
    519     reqntidx = 1;
    520   else
    521     specified = true;
    522   if (!llvm::getReqNTIDy(F, reqntidy))
    523     reqntidy = 1;
    524   else
    525     specified = true;
    526   if (!llvm::getReqNTIDz(F, reqntidz))
    527     reqntidz = 1;
    528   else
    529     specified = true;
    530 
    531   if (specified)
    532     O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
    533       << "\n";
    534 
    535   // If the NVVM IR has some of maxntid* specified, then output
    536   // the maxntid directive, and set the unspecified ones to 1.
    537   // If none of maxntid* is specified, don't output maxntid directive.
    538   unsigned maxntidx, maxntidy, maxntidz;
    539   specified = false;
    540   if (!llvm::getMaxNTIDx(F, maxntidx))
    541     maxntidx = 1;
    542   else
    543     specified = true;
    544   if (!llvm::getMaxNTIDy(F, maxntidy))
    545     maxntidy = 1;
    546   else
    547     specified = true;
    548   if (!llvm::getMaxNTIDz(F, maxntidz))
    549     maxntidz = 1;
    550   else
    551     specified = true;
    552 
    553   if (specified)
    554     O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
    555       << "\n";
    556 
    557   unsigned mincta;
    558   if (llvm::getMinCTASm(F, mincta))
    559     O << ".minnctapersm " << mincta << "\n";
    560 }
    561 
    562 std::string
    563 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
    564   const TargetRegisterClass *RC = MRI->getRegClass(Reg);
    565 
    566   std::string Name;
    567   raw_string_ostream NameStr(Name);
    568 
    569   VRegRCMap::const_iterator I = VRegMapping.find(RC);
    570   assert(I != VRegMapping.end() && "Bad register class");
    571   const DenseMap<unsigned, unsigned> &RegMap = I->second;
    572 
    573   VRegMap::const_iterator VI = RegMap.find(Reg);
    574   assert(VI != RegMap.end() && "Bad virtual register");
    575   unsigned MappedVR = VI->second;
    576 
    577   NameStr << getNVPTXRegClassStr(RC) << MappedVR;
    578 
    579   NameStr.flush();
    580   return Name;
    581 }
    582 
    583 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
    584                                           raw_ostream &O) {
    585   O << getVirtualRegisterName(vr);
    586 }
    587 
    588 void NVPTXAsmPrinter::printVecModifiedImmediate(
    589     const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
    590   static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
    591   int Imm = (int) MO.getImm();
    592   if (0 == strcmp(Modifier, "vecelem"))
    593     O << "_" << vecelem[Imm];
    594   else if (0 == strcmp(Modifier, "vecv4comm1")) {
    595     if ((Imm < 0) || (Imm > 3))
    596       O << "//";
    597   } else if (0 == strcmp(Modifier, "vecv4comm2")) {
    598     if ((Imm < 4) || (Imm > 7))
    599       O << "//";
    600   } else if (0 == strcmp(Modifier, "vecv4pos")) {
    601     if (Imm < 0)
    602       Imm = 0;
    603     O << "_" << vecelem[Imm % 4];
    604   } else if (0 == strcmp(Modifier, "vecv2comm1")) {
    605     if ((Imm < 0) || (Imm > 1))
    606       O << "//";
    607   } else if (0 == strcmp(Modifier, "vecv2comm2")) {
    608     if ((Imm < 2) || (Imm > 3))
    609       O << "//";
    610   } else if (0 == strcmp(Modifier, "vecv2pos")) {
    611     if (Imm < 0)
    612       Imm = 0;
    613     O << "_" << vecelem[Imm % 2];
    614   } else
    615     llvm_unreachable("Unknown Modifier on immediate operand");
    616 }
    617 
    618 
    619 
    620 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
    621 
    622   emitLinkageDirective(F, O);
    623   if (llvm::isKernelFunction(*F))
    624     O << ".entry ";
    625   else
    626     O << ".func ";
    627   printReturnValStr(F, O);
    628   getSymbol(F)->print(O, MAI);
    629   O << "\n";
    630   emitFunctionParamList(F, O);
    631   O << ";\n";
    632 }
    633 
    634 static bool usedInGlobalVarDef(const Constant *C) {
    635   if (!C)
    636     return false;
    637 
    638   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
    639     return GV->getName() != "llvm.used";
    640   }
    641 
    642   for (const User *U : C->users())
    643     if (const Constant *C = dyn_cast<Constant>(U))
    644       if (usedInGlobalVarDef(C))
    645         return true;
    646 
    647   return false;
    648 }
    649 
    650 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
    651   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
    652     if (othergv->getName() == "llvm.used")
    653       return true;
    654   }
    655 
    656   if (const Instruction *instr = dyn_cast<Instruction>(U)) {
    657     if (instr->getParent() && instr->getParent()->getParent()) {
    658       const Function *curFunc = instr->getParent()->getParent();
    659       if (oneFunc && (curFunc != oneFunc))
    660         return false;
    661       oneFunc = curFunc;
    662       return true;
    663     } else
    664       return false;
    665   }
    666 
    667   for (const User *UU : U->users())
    668     if (!usedInOneFunc(UU, oneFunc))
    669       return false;
    670 
    671   return true;
    672 }
    673 
    674 /* Find out if a global variable can be demoted to local scope.
    675  * Currently, this is valid for CUDA shared variables, which have local
    676  * scope and global lifetime. So the conditions to check are :
    677  * 1. Is the global variable in shared address space?
    678  * 2. Does it have internal linkage?
    679  * 3. Is the global variable referenced only in one function?
    680  */
    681 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
    682   if (!gv->hasInternalLinkage())
    683     return false;
    684   PointerType *Pty = gv->getType();
    685   if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
    686     return false;
    687 
    688   const Function *oneFunc = nullptr;
    689 
    690   bool flag = usedInOneFunc(gv, oneFunc);
    691   if (!flag)
    692     return false;
    693   if (!oneFunc)
    694     return false;
    695   f = oneFunc;
    696   return true;
    697 }
    698 
    699 static bool useFuncSeen(const Constant *C,
    700                         llvm::DenseMap<const Function *, bool> &seenMap) {
    701   for (const User *U : C->users()) {
    702     if (const Constant *cu = dyn_cast<Constant>(U)) {
    703       if (useFuncSeen(cu, seenMap))
    704         return true;
    705     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
    706       const BasicBlock *bb = I->getParent();
    707       if (!bb)
    708         continue;
    709       const Function *caller = bb->getParent();
    710       if (!caller)
    711         continue;
    712       if (seenMap.find(caller) != seenMap.end())
    713         return true;
    714     }
    715   }
    716   return false;
    717 }
    718 
    719 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
    720   llvm::DenseMap<const Function *, bool> seenMap;
    721   for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
    722     const Function *F = &*FI;
    723 
    724     if (F->isDeclaration()) {
    725       if (F->use_empty())
    726         continue;
    727       if (F->getIntrinsicID())
    728         continue;
    729       emitDeclaration(F, O);
    730       continue;
    731     }
    732     for (const User *U : F->users()) {
    733       if (const Constant *C = dyn_cast<Constant>(U)) {
    734         if (usedInGlobalVarDef(C)) {
    735           // The use is in the initialization of a global variable
    736           // that is a function pointer, so print a declaration
    737           // for the original function
    738           emitDeclaration(F, O);
    739           break;
    740         }
    741         // Emit a declaration of this function if the function that
    742         // uses this constant expr has already been seen.
    743         if (useFuncSeen(C, seenMap)) {
    744           emitDeclaration(F, O);
    745           break;
    746         }
    747       }
    748 
    749       if (!isa<Instruction>(U))
    750         continue;
    751       const Instruction *instr = cast<Instruction>(U);
    752       const BasicBlock *bb = instr->getParent();
    753       if (!bb)
    754         continue;
    755       const Function *caller = bb->getParent();
    756       if (!caller)
    757         continue;
    758 
    759       // If a caller has already been seen, then the caller is
    760       // appearing in the module before the callee. so print out
    761       // a declaration for the callee.
    762       if (seenMap.find(caller) != seenMap.end()) {
    763         emitDeclaration(F, O);
    764         break;
    765       }
    766     }
    767     seenMap[F] = true;
    768   }
    769 }
    770 
    771 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
    772   DebugInfoFinder DbgFinder;
    773   DbgFinder.processModule(M);
    774 
    775   unsigned i = 1;
    776   for (const DICompileUnit *DIUnit : DbgFinder.compile_units()) {
    777     StringRef Filename = DIUnit->getFilename();
    778     StringRef Dirname = DIUnit->getDirectory();
    779     SmallString<128> FullPathName = Dirname;
    780     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
    781       sys::path::append(FullPathName, Filename);
    782       Filename = FullPathName;
    783     }
    784     if (filenameMap.find(Filename) != filenameMap.end())
    785       continue;
    786     filenameMap[Filename] = i;
    787     OutStreamer->EmitDwarfFileDirective(i, "", Filename);
    788     ++i;
    789   }
    790 
    791   for (DISubprogram *SP : DbgFinder.subprograms()) {
    792     StringRef Filename = SP->getFilename();
    793     StringRef Dirname = SP->getDirectory();
    794     SmallString<128> FullPathName = Dirname;
    795     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
    796       sys::path::append(FullPathName, Filename);
    797       Filename = FullPathName;
    798     }
    799     if (filenameMap.find(Filename) != filenameMap.end())
    800       continue;
    801     filenameMap[Filename] = i;
    802     OutStreamer->EmitDwarfFileDirective(i, "", Filename);
    803     ++i;
    804   }
    805 }
    806 
    807 static bool isEmptyXXStructor(GlobalVariable *GV) {
    808   if (!GV) return true;
    809   const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
    810   if (!InitList) return true;  // Not an array; we don't know how to parse.
    811   return InitList->getNumOperands() == 0;
    812 }
    813 
    814 bool NVPTXAsmPrinter::doInitialization(Module &M) {
    815   // Construct a default subtarget off of the TargetMachine defaults. The
    816   // rest of NVPTX isn't friendly to change subtargets per function and
    817   // so the default TargetMachine will have all of the options.
    818   const Triple &TT = TM.getTargetTriple();
    819   StringRef CPU = TM.getTargetCPU();
    820   StringRef FS = TM.getTargetFeatureString();
    821   const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
    822   const NVPTXSubtarget STI(TT, CPU, FS, NTM);
    823 
    824   if (M.alias_size()) {
    825     report_fatal_error("Module has aliases, which NVPTX does not support.");
    826     return true; // error
    827   }
    828   if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
    829     report_fatal_error(
    830         "Module has a nontrivial global ctor, which NVPTX does not support.");
    831     return true;  // error
    832   }
    833   if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
    834     report_fatal_error(
    835         "Module has a nontrivial global dtor, which NVPTX does not support.");
    836     return true;  // error
    837   }
    838 
    839   SmallString<128> Str1;
    840   raw_svector_ostream OS1(Str1);
    841 
    842   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
    843 
    844   // We need to call the parent's one explicitly.
    845   //bool Result = AsmPrinter::doInitialization(M);
    846 
    847   // Initialize TargetLoweringObjectFile.
    848   const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
    849       .Initialize(OutContext, TM);
    850 
    851   Mang = new Mangler();
    852 
    853   // Emit header before any dwarf directives are emitted below.
    854   emitHeader(M, OS1, STI);
    855   OutStreamer->EmitRawText(OS1.str());
    856 
    857   // Already commented out
    858   //bool Result = AsmPrinter::doInitialization(M);
    859 
    860   // Emit module-level inline asm if it exists.
    861   if (!M.getModuleInlineAsm().empty()) {
    862     OutStreamer->AddComment("Start of file scope inline assembly");
    863     OutStreamer->AddBlankLine();
    864     OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm()));
    865     OutStreamer->AddBlankLine();
    866     OutStreamer->AddComment("End of file scope inline assembly");
    867     OutStreamer->AddBlankLine();
    868   }
    869 
    870   // If we're not NVCL we're CUDA, go ahead and emit filenames.
    871   if (TM.getTargetTriple().getOS() != Triple::NVCL)
    872     recordAndEmitFilenames(M);
    873 
    874   GlobalsEmitted = false;
    875 
    876   return false; // success
    877 }
    878 
    879 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
    880   SmallString<128> Str2;
    881   raw_svector_ostream OS2(Str2);
    882 
    883   emitDeclarations(M, OS2);
    884 
    885   // As ptxas does not support forward references of globals, we need to first
    886   // sort the list of module-level globals in def-use order. We visit each
    887   // global variable in order, and ensure that we emit it *after* its dependent
    888   // globals. We use a little extra memory maintaining both a set and a list to
    889   // have fast searches while maintaining a strict ordering.
    890   SmallVector<const GlobalVariable *, 8> Globals;
    891   DenseSet<const GlobalVariable *> GVVisited;
    892   DenseSet<const GlobalVariable *> GVVisiting;
    893 
    894   // Visit each global variable, in order
    895   for (const GlobalVariable &I : M.globals())
    896     VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
    897 
    898   assert(GVVisited.size() == M.getGlobalList().size() &&
    899          "Missed a global variable");
    900   assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
    901 
    902   // Print out module-level global variables in proper order
    903   for (unsigned i = 0, e = Globals.size(); i != e; ++i)
    904     printModuleLevelGV(Globals[i], OS2);
    905 
    906   OS2 << '\n';
    907 
    908   OutStreamer->EmitRawText(OS2.str());
    909 }
    910 
    911 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
    912                                  const NVPTXSubtarget &STI) {
    913   O << "//\n";
    914   O << "// Generated by LLVM NVPTX Back-End\n";
    915   O << "//\n";
    916   O << "\n";
    917 
    918   unsigned PTXVersion = STI.getPTXVersion();
    919   O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
    920 
    921   O << ".target ";
    922   O << STI.getTargetName();
    923 
    924   const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
    925   if (NTM.getDrvInterface() == NVPTX::NVCL)
    926     O << ", texmode_independent";
    927   else {
    928     if (!STI.hasDouble())
    929       O << ", map_f64_to_f32";
    930   }
    931 
    932   if (MAI->doesSupportDebugInformation())
    933     O << ", debug";
    934 
    935   O << "\n";
    936 
    937   O << ".address_size ";
    938   if (NTM.is64Bit())
    939     O << "64";
    940   else
    941     O << "32";
    942   O << "\n";
    943 
    944   O << "\n";
    945 }
    946 
    947 bool NVPTXAsmPrinter::doFinalization(Module &M) {
    948   // If we did not emit any functions, then the global declarations have not
    949   // yet been emitted.
    950   if (!GlobalsEmitted) {
    951     emitGlobals(M);
    952     GlobalsEmitted = true;
    953   }
    954 
    955   // XXX Temproarily remove global variables so that doFinalization() will not
    956   // emit them again (global variables are emitted at beginning).
    957 
    958   Module::GlobalListType &global_list = M.getGlobalList();
    959   int i, n = global_list.size();
    960   GlobalVariable **gv_array = new GlobalVariable *[n];
    961 
    962   // first, back-up GlobalVariable in gv_array
    963   i = 0;
    964   for (Module::global_iterator I = global_list.begin(), E = global_list.end();
    965        I != E; ++I)
    966     gv_array[i++] = &*I;
    967 
    968   // second, empty global_list
    969   while (!global_list.empty())
    970     global_list.remove(global_list.begin());
    971 
    972   // call doFinalization
    973   bool ret = AsmPrinter::doFinalization(M);
    974 
    975   // now we restore global variables
    976   for (i = 0; i < n; i++)
    977     global_list.insert(global_list.end(), gv_array[i]);
    978 
    979   clearAnnotationCache(&M);
    980 
    981   delete[] gv_array;
    982   return ret;
    983 
    984   //bool Result = AsmPrinter::doFinalization(M);
    985   // Instead of calling the parents doFinalization, we may
    986   // clone parents doFinalization and customize here.
    987   // Currently, we if NVISA out the EmitGlobals() in
    988   // parent's doFinalization, which is too intrusive.
    989   //
    990   // Same for the doInitialization.
    991   //return Result;
    992 }
    993 
    994 // This function emits appropriate linkage directives for
    995 // functions and global variables.
    996 //
    997 // extern function declaration            -> .extern
    998 // extern function definition             -> .visible
    999 // external global variable with init     -> .visible
   1000 // external without init                  -> .extern
   1001 // appending                              -> not allowed, assert.
   1002 // for any linkage other than
   1003 // internal, private, linker_private,
   1004 // linker_private_weak, linker_private_weak_def_auto,
   1005 // we emit                                -> .weak.
   1006 
   1007 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
   1008                                            raw_ostream &O) {
   1009   if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
   1010     if (V->hasExternalLinkage()) {
   1011       if (isa<GlobalVariable>(V)) {
   1012         const GlobalVariable *GVar = cast<GlobalVariable>(V);
   1013         if (GVar) {
   1014           if (GVar->hasInitializer())
   1015             O << ".visible ";
   1016           else
   1017             O << ".extern ";
   1018         }
   1019       } else if (V->isDeclaration())
   1020         O << ".extern ";
   1021       else
   1022         O << ".visible ";
   1023     } else if (V->hasAppendingLinkage()) {
   1024       std::string msg;
   1025       msg.append("Error: ");
   1026       msg.append("Symbol ");
   1027       if (V->hasName())
   1028         msg.append(V->getName());
   1029       msg.append("has unsupported appending linkage type");
   1030       llvm_unreachable(msg.c_str());
   1031     } else if (!V->hasInternalLinkage() &&
   1032                !V->hasPrivateLinkage()) {
   1033       O << ".weak ";
   1034     }
   1035   }
   1036 }
   1037 
   1038 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
   1039                                          raw_ostream &O,
   1040                                          bool processDemoted) {
   1041 
   1042   // Skip meta data
   1043   if (GVar->hasSection()) {
   1044     if (GVar->getSection() == "llvm.metadata")
   1045       return;
   1046   }
   1047 
   1048   // Skip LLVM intrinsic global variables
   1049   if (GVar->getName().startswith("llvm.") ||
   1050       GVar->getName().startswith("nvvm."))
   1051     return;
   1052 
   1053   const DataLayout &DL = getDataLayout();
   1054 
   1055   // GlobalVariables are always constant pointers themselves.
   1056   PointerType *PTy = GVar->getType();
   1057   Type *ETy = GVar->getValueType();
   1058 
   1059   if (GVar->hasExternalLinkage()) {
   1060     if (GVar->hasInitializer())
   1061       O << ".visible ";
   1062     else
   1063       O << ".extern ";
   1064   } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
   1065              GVar->hasAvailableExternallyLinkage() ||
   1066              GVar->hasCommonLinkage()) {
   1067     O << ".weak ";
   1068   }
   1069 
   1070   if (llvm::isTexture(*GVar)) {
   1071     O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
   1072     return;
   1073   }
   1074 
   1075   if (llvm::isSurface(*GVar)) {
   1076     O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
   1077     return;
   1078   }
   1079 
   1080   if (GVar->isDeclaration()) {
   1081     // (extern) declarations, no definition or initializer
   1082     // Currently the only known declaration is for an automatic __local
   1083     // (.shared) promoted to global.
   1084     emitPTXGlobalVariable(GVar, O);
   1085     O << ";\n";
   1086     return;
   1087   }
   1088 
   1089   if (llvm::isSampler(*GVar)) {
   1090     O << ".global .samplerref " << llvm::getSamplerName(*GVar);
   1091 
   1092     const Constant *Initializer = nullptr;
   1093     if (GVar->hasInitializer())
   1094       Initializer = GVar->getInitializer();
   1095     const ConstantInt *CI = nullptr;
   1096     if (Initializer)
   1097       CI = dyn_cast<ConstantInt>(Initializer);
   1098     if (CI) {
   1099       unsigned sample = CI->getZExtValue();
   1100 
   1101       O << " = { ";
   1102 
   1103       for (int i = 0,
   1104                addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
   1105            i < 3; i++) {
   1106         O << "addr_mode_" << i << " = ";
   1107         switch (addr) {
   1108         case 0:
   1109           O << "wrap";
   1110           break;
   1111         case 1:
   1112           O << "clamp_to_border";
   1113           break;
   1114         case 2:
   1115           O << "clamp_to_edge";
   1116           break;
   1117         case 3:
   1118           O << "wrap";
   1119           break;
   1120         case 4:
   1121           O << "mirror";
   1122           break;
   1123         }
   1124         O << ", ";
   1125       }
   1126       O << "filter_mode = ";
   1127       switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
   1128       case 0:
   1129         O << "nearest";
   1130         break;
   1131       case 1:
   1132         O << "linear";
   1133         break;
   1134       case 2:
   1135         llvm_unreachable("Anisotropic filtering is not supported");
   1136       default:
   1137         O << "nearest";
   1138         break;
   1139       }
   1140       if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
   1141         O << ", force_unnormalized_coords = 1";
   1142       }
   1143       O << " }";
   1144     }
   1145 
   1146     O << ";\n";
   1147     return;
   1148   }
   1149 
   1150   if (GVar->hasPrivateLinkage()) {
   1151 
   1152     if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
   1153       return;
   1154 
   1155     // FIXME - need better way (e.g. Metadata) to avoid generating this global
   1156     if (!strncmp(GVar->getName().data(), "filename", 8))
   1157       return;
   1158     if (GVar->use_empty())
   1159       return;
   1160   }
   1161 
   1162   const Function *demotedFunc = nullptr;
   1163   if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
   1164     O << "// " << GVar->getName() << " has been demoted\n";
   1165     if (localDecls.find(demotedFunc) != localDecls.end())
   1166       localDecls[demotedFunc].push_back(GVar);
   1167     else {
   1168       std::vector<const GlobalVariable *> temp;
   1169       temp.push_back(GVar);
   1170       localDecls[demotedFunc] = temp;
   1171     }
   1172     return;
   1173   }
   1174 
   1175   O << ".";
   1176   emitPTXAddressSpace(PTy->getAddressSpace(), O);
   1177 
   1178   if (isManaged(*GVar)) {
   1179     O << " .attribute(.managed)";
   1180   }
   1181 
   1182   if (GVar->getAlignment() == 0)
   1183     O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
   1184   else
   1185     O << " .align " << GVar->getAlignment();
   1186 
   1187   if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
   1188     O << " .";
   1189     // Special case: ABI requires that we use .u8 for predicates
   1190     if (ETy->isIntegerTy(1))
   1191       O << "u8";
   1192     else
   1193       O << getPTXFundamentalTypeStr(ETy, false);
   1194     O << " ";
   1195     getSymbol(GVar)->print(O, MAI);
   1196 
   1197     // Ptx allows variable initilization only for constant and global state
   1198     // spaces.
   1199     if (GVar->hasInitializer()) {
   1200       if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
   1201           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) {
   1202         const Constant *Initializer = GVar->getInitializer();
   1203         // 'undef' is treated as there is no value specified.
   1204         if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
   1205           O << " = ";
   1206           printScalarConstant(Initializer, O);
   1207         }
   1208       } else {
   1209         // The frontend adds zero-initializer to device and constant variables
   1210         // that don't have an initial value, and UndefValue to shared
   1211         // variables, so skip warning for this case.
   1212         if (!GVar->getInitializer()->isNullValue() &&
   1213             !isa<UndefValue>(GVar->getInitializer())) {
   1214           report_fatal_error("initial value of '" + GVar->getName() +
   1215                              "' is not allowed in addrspace(" +
   1216                              Twine(PTy->getAddressSpace()) + ")");
   1217         }
   1218       }
   1219     }
   1220   } else {
   1221     unsigned int ElementSize = 0;
   1222 
   1223     // Although PTX has direct support for struct type and array type and
   1224     // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
   1225     // targets that support these high level field accesses. Structs, arrays
   1226     // and vectors are lowered into arrays of bytes.
   1227     switch (ETy->getTypeID()) {
   1228     case Type::StructTyID:
   1229     case Type::ArrayTyID:
   1230     case Type::VectorTyID:
   1231       ElementSize = DL.getTypeStoreSize(ETy);
   1232       // Ptx allows variable initilization only for constant and
   1233       // global state spaces.
   1234       if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
   1235            (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
   1236           GVar->hasInitializer()) {
   1237         const Constant *Initializer = GVar->getInitializer();
   1238         if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
   1239           AggBuffer aggBuffer(ElementSize, O, *this);
   1240           bufferAggregateConstant(Initializer, &aggBuffer);
   1241           if (aggBuffer.numSymbols) {
   1242             if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
   1243               O << " .u64 ";
   1244               getSymbol(GVar)->print(O, MAI);
   1245               O << "[";
   1246               O << ElementSize / 8;
   1247             } else {
   1248               O << " .u32 ";
   1249               getSymbol(GVar)->print(O, MAI);
   1250               O << "[";
   1251               O << ElementSize / 4;
   1252             }
   1253             O << "]";
   1254           } else {
   1255             O << " .b8 ";
   1256             getSymbol(GVar)->print(O, MAI);
   1257             O << "[";
   1258             O << ElementSize;
   1259             O << "]";
   1260           }
   1261           O << " = {";
   1262           aggBuffer.print();
   1263           O << "}";
   1264         } else {
   1265           O << " .b8 ";
   1266           getSymbol(GVar)->print(O, MAI);
   1267           if (ElementSize) {
   1268             O << "[";
   1269             O << ElementSize;
   1270             O << "]";
   1271           }
   1272         }
   1273       } else {
   1274         O << " .b8 ";
   1275         getSymbol(GVar)->print(O, MAI);
   1276         if (ElementSize) {
   1277           O << "[";
   1278           O << ElementSize;
   1279           O << "]";
   1280         }
   1281       }
   1282       break;
   1283     default:
   1284       llvm_unreachable("type not supported yet");
   1285     }
   1286 
   1287   }
   1288   O << ";\n";
   1289 }
   1290 
   1291 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
   1292   if (localDecls.find(f) == localDecls.end())
   1293     return;
   1294 
   1295   std::vector<const GlobalVariable *> &gvars = localDecls[f];
   1296 
   1297   for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
   1298     O << "\t// demoted variable\n\t";
   1299     printModuleLevelGV(gvars[i], O, true);
   1300   }
   1301 }
   1302 
   1303 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
   1304                                           raw_ostream &O) const {
   1305   switch (AddressSpace) {
   1306   case llvm::ADDRESS_SPACE_LOCAL:
   1307     O << "local";
   1308     break;
   1309   case llvm::ADDRESS_SPACE_GLOBAL:
   1310     O << "global";
   1311     break;
   1312   case llvm::ADDRESS_SPACE_CONST:
   1313     O << "const";
   1314     break;
   1315   case llvm::ADDRESS_SPACE_SHARED:
   1316     O << "shared";
   1317     break;
   1318   default:
   1319     report_fatal_error("Bad address space found while emitting PTX");
   1320     break;
   1321   }
   1322 }
   1323 
   1324 std::string
   1325 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
   1326   switch (Ty->getTypeID()) {
   1327   default:
   1328     llvm_unreachable("unexpected type");
   1329     break;
   1330   case Type::IntegerTyID: {
   1331     unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
   1332     if (NumBits == 1)
   1333       return "pred";
   1334     else if (NumBits <= 64) {
   1335       std::string name = "u";
   1336       return name + utostr(NumBits);
   1337     } else {
   1338       llvm_unreachable("Integer too large");
   1339       break;
   1340     }
   1341     break;
   1342   }
   1343   case Type::FloatTyID:
   1344     return "f32";
   1345   case Type::DoubleTyID:
   1346     return "f64";
   1347   case Type::PointerTyID:
   1348     if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
   1349       if (useB4PTR)
   1350         return "b64";
   1351       else
   1352         return "u64";
   1353     else if (useB4PTR)
   1354       return "b32";
   1355     else
   1356       return "u32";
   1357   }
   1358   llvm_unreachable("unexpected type");
   1359   return nullptr;
   1360 }
   1361 
   1362 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
   1363                                             raw_ostream &O) {
   1364 
   1365   const DataLayout &DL = getDataLayout();
   1366 
   1367   // GlobalVariables are always constant pointers themselves.
   1368   Type *ETy = GVar->getValueType();
   1369 
   1370   O << ".";
   1371   emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
   1372   if (GVar->getAlignment() == 0)
   1373     O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
   1374   else
   1375     O << " .align " << GVar->getAlignment();
   1376 
   1377   if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
   1378     O << " .";
   1379     O << getPTXFundamentalTypeStr(ETy);
   1380     O << " ";
   1381     getSymbol(GVar)->print(O, MAI);
   1382     return;
   1383   }
   1384 
   1385   int64_t ElementSize = 0;
   1386 
   1387   // Although PTX has direct support for struct type and array type and LLVM IR
   1388   // is very similar to PTX, the LLVM CodeGen does not support for targets that
   1389   // support these high level field accesses. Structs and arrays are lowered
   1390   // into arrays of bytes.
   1391   switch (ETy->getTypeID()) {
   1392   case Type::StructTyID:
   1393   case Type::ArrayTyID:
   1394   case Type::VectorTyID:
   1395     ElementSize = DL.getTypeStoreSize(ETy);
   1396     O << " .b8 ";
   1397     getSymbol(GVar)->print(O, MAI);
   1398     O << "[";
   1399     if (ElementSize) {
   1400       O << ElementSize;
   1401     }
   1402     O << "]";
   1403     break;
   1404   default:
   1405     llvm_unreachable("type not supported yet");
   1406   }
   1407   return;
   1408 }
   1409 
   1410 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
   1411   if (Ty->isSingleValueType())
   1412     return DL.getPrefTypeAlignment(Ty);
   1413 
   1414   auto *ATy = dyn_cast<ArrayType>(Ty);
   1415   if (ATy)
   1416     return getOpenCLAlignment(DL, ATy->getElementType());
   1417 
   1418   auto *STy = dyn_cast<StructType>(Ty);
   1419   if (STy) {
   1420     unsigned int alignStruct = 1;
   1421     // Go through each element of the struct and find the
   1422     // largest alignment.
   1423     for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
   1424       Type *ETy = STy->getElementType(i);
   1425       unsigned int align = getOpenCLAlignment(DL, ETy);
   1426       if (align > alignStruct)
   1427         alignStruct = align;
   1428     }
   1429     return alignStruct;
   1430   }
   1431 
   1432   auto *FTy = dyn_cast<FunctionType>(Ty);
   1433   if (FTy)
   1434     return DL.getPointerPrefAlignment();
   1435   return DL.getPrefTypeAlignment(Ty);
   1436 }
   1437 
   1438 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
   1439                                      int paramIndex, raw_ostream &O) {
   1440   getSymbol(I->getParent())->print(O, MAI);
   1441   O << "_param_" << paramIndex;
   1442 }
   1443 
   1444 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
   1445   const DataLayout &DL = getDataLayout();
   1446   const AttributeSet &PAL = F->getAttributes();
   1447   const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
   1448   Function::const_arg_iterator I, E;
   1449   unsigned paramIndex = 0;
   1450   bool first = true;
   1451   bool isKernelFunc = llvm::isKernelFunction(*F);
   1452   bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
   1453   MVT thePointerTy = TLI->getPointerTy(DL);
   1454 
   1455   if (F->arg_empty()) {
   1456     O << "()\n";
   1457     return;
   1458   }
   1459 
   1460   O << "(\n";
   1461 
   1462   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
   1463     Type *Ty = I->getType();
   1464 
   1465     if (!first)
   1466       O << ",\n";
   1467 
   1468     first = false;
   1469 
   1470     // Handle image/sampler parameters
   1471     if (isKernelFunction(*F)) {
   1472       if (isSampler(*I) || isImage(*I)) {
   1473         if (isImage(*I)) {
   1474           std::string sname = I->getName();
   1475           if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
   1476             if (nvptxSubtarget->hasImageHandles())
   1477               O << "\t.param .u64 .ptr .surfref ";
   1478             else
   1479               O << "\t.param .surfref ";
   1480             CurrentFnSym->print(O, MAI);
   1481             O << "_param_" << paramIndex;
   1482           }
   1483           else { // Default image is read_only
   1484             if (nvptxSubtarget->hasImageHandles())
   1485               O << "\t.param .u64 .ptr .texref ";
   1486             else
   1487               O << "\t.param .texref ";
   1488             CurrentFnSym->print(O, MAI);
   1489             O << "_param_" << paramIndex;
   1490           }
   1491         } else {
   1492           if (nvptxSubtarget->hasImageHandles())
   1493             O << "\t.param .u64 .ptr .samplerref ";
   1494           else
   1495             O << "\t.param .samplerref ";
   1496           CurrentFnSym->print(O, MAI);
   1497           O << "_param_" << paramIndex;
   1498         }
   1499         continue;
   1500       }
   1501     }
   1502 
   1503     if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) {
   1504       if (Ty->isAggregateType() || Ty->isVectorTy()) {
   1505         // Just print .param .align <a> .b8 .param[size];
   1506         // <a> = PAL.getparamalignment
   1507         // size = typeallocsize of element type
   1508         unsigned align = PAL.getParamAlignment(paramIndex + 1);
   1509         if (align == 0)
   1510           align = DL.getABITypeAlignment(Ty);
   1511 
   1512         unsigned sz = DL.getTypeAllocSize(Ty);
   1513         O << "\t.param .align " << align << " .b8 ";
   1514         printParamName(I, paramIndex, O);
   1515         O << "[" << sz << "]";
   1516 
   1517         continue;
   1518       }
   1519       // Just a scalar
   1520       auto *PTy = dyn_cast<PointerType>(Ty);
   1521       if (isKernelFunc) {
   1522         if (PTy) {
   1523           // Special handling for pointer arguments to kernel
   1524           O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
   1525 
   1526           if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
   1527               NVPTX::CUDA) {
   1528             Type *ETy = PTy->getElementType();
   1529             int addrSpace = PTy->getAddressSpace();
   1530             switch (addrSpace) {
   1531             default:
   1532               O << ".ptr ";
   1533               break;
   1534             case llvm::ADDRESS_SPACE_CONST:
   1535               O << ".ptr .const ";
   1536               break;
   1537             case llvm::ADDRESS_SPACE_SHARED:
   1538               O << ".ptr .shared ";
   1539               break;
   1540             case llvm::ADDRESS_SPACE_GLOBAL:
   1541               O << ".ptr .global ";
   1542               break;
   1543             }
   1544             O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
   1545           }
   1546           printParamName(I, paramIndex, O);
   1547           continue;
   1548         }
   1549 
   1550         // non-pointer scalar to kernel func
   1551         O << "\t.param .";
   1552         // Special case: predicate operands become .u8 types
   1553         if (Ty->isIntegerTy(1))
   1554           O << "u8";
   1555         else
   1556           O << getPTXFundamentalTypeStr(Ty);
   1557         O << " ";
   1558         printParamName(I, paramIndex, O);
   1559         continue;
   1560       }
   1561       // Non-kernel function, just print .param .b<size> for ABI
   1562       // and .reg .b<size> for non-ABI
   1563       unsigned sz = 0;
   1564       if (isa<IntegerType>(Ty)) {
   1565         sz = cast<IntegerType>(Ty)->getBitWidth();
   1566         if (sz < 32)
   1567           sz = 32;
   1568       } else if (isa<PointerType>(Ty))
   1569         sz = thePointerTy.getSizeInBits();
   1570       else
   1571         sz = Ty->getPrimitiveSizeInBits();
   1572       if (isABI)
   1573         O << "\t.param .b" << sz << " ";
   1574       else
   1575         O << "\t.reg .b" << sz << " ";
   1576       printParamName(I, paramIndex, O);
   1577       continue;
   1578     }
   1579 
   1580     // param has byVal attribute. So should be a pointer
   1581     auto *PTy = dyn_cast<PointerType>(Ty);
   1582     assert(PTy && "Param with byval attribute should be a pointer type");
   1583     Type *ETy = PTy->getElementType();
   1584 
   1585     if (isABI || isKernelFunc) {
   1586       // Just print .param .align <a> .b8 .param[size];
   1587       // <a> = PAL.getparamalignment
   1588       // size = typeallocsize of element type
   1589       unsigned align = PAL.getParamAlignment(paramIndex + 1);
   1590       if (align == 0)
   1591         align = DL.getABITypeAlignment(ETy);
   1592 
   1593       unsigned sz = DL.getTypeAllocSize(ETy);
   1594       O << "\t.param .align " << align << " .b8 ";
   1595       printParamName(I, paramIndex, O);
   1596       O << "[" << sz << "]";
   1597       continue;
   1598     } else {
   1599       // Split the ETy into constituent parts and
   1600       // print .param .b<size> <name> for each part.
   1601       // Further, if a part is vector, print the above for
   1602       // each vector element.
   1603       SmallVector<EVT, 16> vtparts;
   1604       ComputeValueVTs(*TLI, DL, ETy, vtparts);
   1605       for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
   1606         unsigned elems = 1;
   1607         EVT elemtype = vtparts[i];
   1608         if (vtparts[i].isVector()) {
   1609           elems = vtparts[i].getVectorNumElements();
   1610           elemtype = vtparts[i].getVectorElementType();
   1611         }
   1612 
   1613         for (unsigned j = 0, je = elems; j != je; ++j) {
   1614           unsigned sz = elemtype.getSizeInBits();
   1615           if (elemtype.isInteger() && (sz < 32))
   1616             sz = 32;
   1617           O << "\t.reg .b" << sz << " ";
   1618           printParamName(I, paramIndex, O);
   1619           if (j < je - 1)
   1620             O << ",\n";
   1621           ++paramIndex;
   1622         }
   1623         if (i < e - 1)
   1624           O << ",\n";
   1625       }
   1626       --paramIndex;
   1627       continue;
   1628     }
   1629   }
   1630 
   1631   O << "\n)\n";
   1632 }
   1633 
   1634 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
   1635                                             raw_ostream &O) {
   1636   const Function *F = MF.getFunction();
   1637   emitFunctionParamList(F, O);
   1638 }
   1639 
   1640 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
   1641     const MachineFunction &MF) {
   1642   SmallString<128> Str;
   1643   raw_svector_ostream O(Str);
   1644 
   1645   // Map the global virtual register number to a register class specific
   1646   // virtual register number starting from 1 with that class.
   1647   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   1648   //unsigned numRegClasses = TRI->getNumRegClasses();
   1649 
   1650   // Emit the Fake Stack Object
   1651   const MachineFrameInfo *MFI = MF.getFrameInfo();
   1652   int NumBytes = (int) MFI->getStackSize();
   1653   if (NumBytes) {
   1654     O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
   1655       << getFunctionNumber() << "[" << NumBytes << "];\n";
   1656     if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
   1657       O << "\t.reg .b64 \t%SP;\n";
   1658       O << "\t.reg .b64 \t%SPL;\n";
   1659     } else {
   1660       O << "\t.reg .b32 \t%SP;\n";
   1661       O << "\t.reg .b32 \t%SPL;\n";
   1662     }
   1663   }
   1664 
   1665   // Go through all virtual registers to establish the mapping between the
   1666   // global virtual
   1667   // register number and the per class virtual register number.
   1668   // We use the per class virtual register number in the ptx output.
   1669   unsigned int numVRs = MRI->getNumVirtRegs();
   1670   for (unsigned i = 0; i < numVRs; i++) {
   1671     unsigned int vr = TRI->index2VirtReg(i);
   1672     const TargetRegisterClass *RC = MRI->getRegClass(vr);
   1673     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
   1674     int n = regmap.size();
   1675     regmap.insert(std::make_pair(vr, n + 1));
   1676   }
   1677 
   1678   // Emit register declarations
   1679   // @TODO: Extract out the real register usage
   1680   // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
   1681   // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
   1682   // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
   1683   // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
   1684   // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
   1685   // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
   1686   // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
   1687 
   1688   // Emit declaration of the virtual registers or 'physical' registers for
   1689   // each register class
   1690   for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
   1691     const TargetRegisterClass *RC = TRI->getRegClass(i);
   1692     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
   1693     std::string rcname = getNVPTXRegClassName(RC);
   1694     std::string rcStr = getNVPTXRegClassStr(RC);
   1695     int n = regmap.size();
   1696 
   1697     // Only declare those registers that may be used.
   1698     if (n) {
   1699        O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
   1700          << ">;\n";
   1701     }
   1702   }
   1703 
   1704   OutStreamer->EmitRawText(O.str());
   1705 }
   1706 
   1707 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
   1708   APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
   1709   bool ignored;
   1710   unsigned int numHex;
   1711   const char *lead;
   1712 
   1713   if (Fp->getType()->getTypeID() == Type::FloatTyID) {
   1714     numHex = 8;
   1715     lead = "0f";
   1716     APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
   1717   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
   1718     numHex = 16;
   1719     lead = "0d";
   1720     APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
   1721   } else
   1722     llvm_unreachable("unsupported fp type");
   1723 
   1724   APInt API = APF.bitcastToAPInt();
   1725   std::string hexstr(utohexstr(API.getZExtValue()));
   1726   O << lead;
   1727   if (hexstr.length() < numHex)
   1728     O << std::string(numHex - hexstr.length(), '0');
   1729   O << utohexstr(API.getZExtValue());
   1730 }
   1731 
   1732 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
   1733   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
   1734     O << CI->getValue();
   1735     return;
   1736   }
   1737   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
   1738     printFPConstant(CFP, O);
   1739     return;
   1740   }
   1741   if (isa<ConstantPointerNull>(CPV)) {
   1742     O << "0";
   1743     return;
   1744   }
   1745   if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
   1746     bool IsNonGenericPointer = false;
   1747     if (GVar->getType()->getAddressSpace() != 0) {
   1748       IsNonGenericPointer = true;
   1749     }
   1750     if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
   1751       O << "generic(";
   1752       getSymbol(GVar)->print(O, MAI);
   1753       O << ")";
   1754     } else {
   1755       getSymbol(GVar)->print(O, MAI);
   1756     }
   1757     return;
   1758   }
   1759   if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   1760     const Value *v = Cexpr->stripPointerCasts();
   1761     PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
   1762     bool IsNonGenericPointer = false;
   1763     if (PTy && PTy->getAddressSpace() != 0) {
   1764       IsNonGenericPointer = true;
   1765     }
   1766     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
   1767       if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
   1768         O << "generic(";
   1769         getSymbol(GVar)->print(O, MAI);
   1770         O << ")";
   1771       } else {
   1772         getSymbol(GVar)->print(O, MAI);
   1773       }
   1774       return;
   1775     } else {
   1776       lowerConstant(CPV)->print(O, MAI);
   1777       return;
   1778     }
   1779   }
   1780   llvm_unreachable("Not scalar type found in printScalarConstant()");
   1781 }
   1782 
   1783 // These utility functions assure we get the right sequence of bytes for a given
   1784 // type even for big-endian machines
   1785 template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
   1786   int64_t vp = (int64_t)val;
   1787   for (unsigned i = 0; i < sizeof(T); ++i) {
   1788     p[i] = (unsigned char)vp;
   1789     vp >>= 8;
   1790   }
   1791 }
   1792 static void ConvertFloatToBytes(unsigned char *p, float val) {
   1793   int32_t *vp = (int32_t *)&val;
   1794   for (unsigned i = 0; i < sizeof(int32_t); ++i) {
   1795     p[i] = (unsigned char)*vp;
   1796     *vp >>= 8;
   1797   }
   1798 }
   1799 static void ConvertDoubleToBytes(unsigned char *p, double val) {
   1800   int64_t *vp = (int64_t *)&val;
   1801   for (unsigned i = 0; i < sizeof(int64_t); ++i) {
   1802     p[i] = (unsigned char)*vp;
   1803     *vp >>= 8;
   1804   }
   1805 }
   1806 
   1807 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
   1808                                    AggBuffer *aggBuffer) {
   1809 
   1810   const DataLayout &DL = getDataLayout();
   1811 
   1812   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
   1813     int s = DL.getTypeAllocSize(CPV->getType());
   1814     if (s < Bytes)
   1815       s = Bytes;
   1816     aggBuffer->addZeros(s);
   1817     return;
   1818   }
   1819 
   1820   unsigned char ptr[8];
   1821   switch (CPV->getType()->getTypeID()) {
   1822 
   1823   case Type::IntegerTyID: {
   1824     Type *ETy = CPV->getType();
   1825     if (ETy == Type::getInt8Ty(CPV->getContext())) {
   1826       unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
   1827       ConvertIntToBytes<>(ptr, c);
   1828       aggBuffer->addBytes(ptr, 1, Bytes);
   1829     } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
   1830       short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
   1831       ConvertIntToBytes<>(ptr, int16);
   1832       aggBuffer->addBytes(ptr, 2, Bytes);
   1833     } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
   1834       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
   1835         int int32 = (int)(constInt->getZExtValue());
   1836         ConvertIntToBytes<>(ptr, int32);
   1837         aggBuffer->addBytes(ptr, 4, Bytes);
   1838         break;
   1839       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   1840         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
   1841                 ConstantFoldConstantExpression(Cexpr, DL))) {
   1842           int int32 = (int)(constInt->getZExtValue());
   1843           ConvertIntToBytes<>(ptr, int32);
   1844           aggBuffer->addBytes(ptr, 4, Bytes);
   1845           break;
   1846         }
   1847         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
   1848           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
   1849           aggBuffer->addSymbol(v, Cexpr->getOperand(0));
   1850           aggBuffer->addZeros(4);
   1851           break;
   1852         }
   1853       }
   1854       llvm_unreachable("unsupported integer const type");
   1855     } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
   1856       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
   1857         long long int64 = (long long)(constInt->getZExtValue());
   1858         ConvertIntToBytes<>(ptr, int64);
   1859         aggBuffer->addBytes(ptr, 8, Bytes);
   1860         break;
   1861       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   1862         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
   1863                 ConstantFoldConstantExpression(Cexpr, DL))) {
   1864           long long int64 = (long long)(constInt->getZExtValue());
   1865           ConvertIntToBytes<>(ptr, int64);
   1866           aggBuffer->addBytes(ptr, 8, Bytes);
   1867           break;
   1868         }
   1869         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
   1870           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
   1871           aggBuffer->addSymbol(v, Cexpr->getOperand(0));
   1872           aggBuffer->addZeros(8);
   1873           break;
   1874         }
   1875       }
   1876       llvm_unreachable("unsupported integer const type");
   1877     } else
   1878       llvm_unreachable("unsupported integer const type");
   1879     break;
   1880   }
   1881   case Type::FloatTyID:
   1882   case Type::DoubleTyID: {
   1883     const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
   1884     Type *Ty = CFP->getType();
   1885     if (Ty == Type::getFloatTy(CPV->getContext())) {
   1886       float float32 = (float) CFP->getValueAPF().convertToFloat();
   1887       ConvertFloatToBytes(ptr, float32);
   1888       aggBuffer->addBytes(ptr, 4, Bytes);
   1889     } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
   1890       double float64 = CFP->getValueAPF().convertToDouble();
   1891       ConvertDoubleToBytes(ptr, float64);
   1892       aggBuffer->addBytes(ptr, 8, Bytes);
   1893     } else {
   1894       llvm_unreachable("unsupported fp const type");
   1895     }
   1896     break;
   1897   }
   1898   case Type::PointerTyID: {
   1899     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
   1900       aggBuffer->addSymbol(GVar, GVar);
   1901     } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   1902       const Value *v = Cexpr->stripPointerCasts();
   1903       aggBuffer->addSymbol(v, Cexpr);
   1904     }
   1905     unsigned int s = DL.getTypeAllocSize(CPV->getType());
   1906     aggBuffer->addZeros(s);
   1907     break;
   1908   }
   1909 
   1910   case Type::ArrayTyID:
   1911   case Type::VectorTyID:
   1912   case Type::StructTyID: {
   1913     if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
   1914       int ElementSize = DL.getTypeAllocSize(CPV->getType());
   1915       bufferAggregateConstant(CPV, aggBuffer);
   1916       if (Bytes > ElementSize)
   1917         aggBuffer->addZeros(Bytes - ElementSize);
   1918     } else if (isa<ConstantAggregateZero>(CPV))
   1919       aggBuffer->addZeros(Bytes);
   1920     else
   1921       llvm_unreachable("Unexpected Constant type");
   1922     break;
   1923   }
   1924 
   1925   default:
   1926     llvm_unreachable("unsupported type");
   1927   }
   1928 }
   1929 
   1930 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
   1931                                               AggBuffer *aggBuffer) {
   1932   const DataLayout &DL = getDataLayout();
   1933   int Bytes;
   1934 
   1935   // Old constants
   1936   if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
   1937     if (CPV->getNumOperands())
   1938       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
   1939         bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
   1940     return;
   1941   }
   1942 
   1943   if (const ConstantDataSequential *CDS =
   1944           dyn_cast<ConstantDataSequential>(CPV)) {
   1945     if (CDS->getNumElements())
   1946       for (unsigned i = 0; i < CDS->getNumElements(); ++i)
   1947         bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
   1948                      aggBuffer);
   1949     return;
   1950   }
   1951 
   1952   if (isa<ConstantStruct>(CPV)) {
   1953     if (CPV->getNumOperands()) {
   1954       StructType *ST = cast<StructType>(CPV->getType());
   1955       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
   1956         if (i == (e - 1))
   1957           Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
   1958                   DL.getTypeAllocSize(ST) -
   1959                   DL.getStructLayout(ST)->getElementOffset(i);
   1960         else
   1961           Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
   1962                   DL.getStructLayout(ST)->getElementOffset(i);
   1963         bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
   1964       }
   1965     }
   1966     return;
   1967   }
   1968   llvm_unreachable("unsupported constant type in printAggregateConstant()");
   1969 }
   1970 
   1971 // buildTypeNameMap - Run through symbol table looking for type names.
   1972 //
   1973 
   1974 
   1975 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
   1976   switch (MI.getOpcode()) {
   1977   default:
   1978     return false;
   1979   case NVPTX::CallArgBeginInst:
   1980   case NVPTX::CallArgEndInst0:
   1981   case NVPTX::CallArgEndInst1:
   1982   case NVPTX::CallArgF32:
   1983   case NVPTX::CallArgF64:
   1984   case NVPTX::CallArgI16:
   1985   case NVPTX::CallArgI32:
   1986   case NVPTX::CallArgI32imm:
   1987   case NVPTX::CallArgI64:
   1988   case NVPTX::CallArgParam:
   1989   case NVPTX::CallVoidInst:
   1990   case NVPTX::CallVoidInstReg:
   1991   case NVPTX::Callseq_End:
   1992   case NVPTX::CallVoidInstReg64:
   1993   case NVPTX::DeclareParamInst:
   1994   case NVPTX::DeclareRetMemInst:
   1995   case NVPTX::DeclareRetRegInst:
   1996   case NVPTX::DeclareRetScalarInst:
   1997   case NVPTX::DeclareScalarParamInst:
   1998   case NVPTX::DeclareScalarRegInst:
   1999   case NVPTX::StoreParamF32:
   2000   case NVPTX::StoreParamF64:
   2001   case NVPTX::StoreParamI16:
   2002   case NVPTX::StoreParamI32:
   2003   case NVPTX::StoreParamI64:
   2004   case NVPTX::StoreParamI8:
   2005   case NVPTX::StoreRetvalF32:
   2006   case NVPTX::StoreRetvalF64:
   2007   case NVPTX::StoreRetvalI16:
   2008   case NVPTX::StoreRetvalI32:
   2009   case NVPTX::StoreRetvalI64:
   2010   case NVPTX::StoreRetvalI8:
   2011   case NVPTX::LastCallArgF32:
   2012   case NVPTX::LastCallArgF64:
   2013   case NVPTX::LastCallArgI16:
   2014   case NVPTX::LastCallArgI32:
   2015   case NVPTX::LastCallArgI32imm:
   2016   case NVPTX::LastCallArgI64:
   2017   case NVPTX::LastCallArgParam:
   2018   case NVPTX::LoadParamMemF32:
   2019   case NVPTX::LoadParamMemF64:
   2020   case NVPTX::LoadParamMemI16:
   2021   case NVPTX::LoadParamMemI32:
   2022   case NVPTX::LoadParamMemI64:
   2023   case NVPTX::LoadParamMemI8:
   2024   case NVPTX::PrototypeInst:
   2025   case NVPTX::DBG_VALUE:
   2026     return true;
   2027   }
   2028   return false;
   2029 }
   2030 
   2031 /// lowerConstantForGV - Return an MCExpr for the given Constant.  This is mostly
   2032 /// a copy from AsmPrinter::lowerConstant, except customized to only handle
   2033 /// expressions that are representable in PTX and create
   2034 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
   2035 const MCExpr *
   2036 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
   2037   MCContext &Ctx = OutContext;
   2038 
   2039   if (CV->isNullValue() || isa<UndefValue>(CV))
   2040     return MCConstantExpr::create(0, Ctx);
   2041 
   2042   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
   2043     return MCConstantExpr::create(CI->getZExtValue(), Ctx);
   2044 
   2045   if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
   2046     const MCSymbolRefExpr *Expr =
   2047       MCSymbolRefExpr::create(getSymbol(GV), Ctx);
   2048     if (ProcessingGeneric) {
   2049       return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
   2050     } else {
   2051       return Expr;
   2052     }
   2053   }
   2054 
   2055   const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
   2056   if (!CE) {
   2057     llvm_unreachable("Unknown constant value to lower!");
   2058   }
   2059 
   2060   switch (CE->getOpcode()) {
   2061   default:
   2062     // If the code isn't optimized, there may be outstanding folding
   2063     // opportunities. Attempt to fold the expression using DataLayout as a
   2064     // last resort before giving up.
   2065     if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
   2066       if (C != CE)
   2067         return lowerConstantForGV(C, ProcessingGeneric);
   2068 
   2069     // Otherwise report the problem to the user.
   2070     {
   2071       std::string S;
   2072       raw_string_ostream OS(S);
   2073       OS << "Unsupported expression in static initializer: ";
   2074       CE->printAsOperand(OS, /*PrintType=*/false,
   2075                      !MF ? nullptr : MF->getFunction()->getParent());
   2076       report_fatal_error(OS.str());
   2077     }
   2078 
   2079   case Instruction::AddrSpaceCast: {
   2080     // Strip the addrspacecast and pass along the operand
   2081     PointerType *DstTy = cast<PointerType>(CE->getType());
   2082     if (DstTy->getAddressSpace() == 0) {
   2083       return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
   2084     }
   2085     std::string S;
   2086     raw_string_ostream OS(S);
   2087     OS << "Unsupported expression in static initializer: ";
   2088     CE->printAsOperand(OS, /*PrintType=*/ false,
   2089                        !MF ? 0 : MF->getFunction()->getParent());
   2090     report_fatal_error(OS.str());
   2091   }
   2092 
   2093   case Instruction::GetElementPtr: {
   2094     const DataLayout &DL = getDataLayout();
   2095 
   2096     // Generate a symbolic expression for the byte address
   2097     APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
   2098     cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
   2099 
   2100     const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
   2101                                             ProcessingGeneric);
   2102     if (!OffsetAI)
   2103       return Base;
   2104 
   2105     int64_t Offset = OffsetAI.getSExtValue();
   2106     return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
   2107                                    Ctx);
   2108   }
   2109 
   2110   case Instruction::Trunc:
   2111     // We emit the value and depend on the assembler to truncate the generated
   2112     // expression properly.  This is important for differences between
   2113     // blockaddress labels.  Since the two labels are in the same function, it
   2114     // is reasonable to treat their delta as a 32-bit value.
   2115     // FALL THROUGH.
   2116   case Instruction::BitCast:
   2117     return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
   2118 
   2119   case Instruction::IntToPtr: {
   2120     const DataLayout &DL = getDataLayout();
   2121 
   2122     // Handle casts to pointers by changing them into casts to the appropriate
   2123     // integer type.  This promotes constant folding and simplifies this code.
   2124     Constant *Op = CE->getOperand(0);
   2125     Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
   2126                                       false/*ZExt*/);
   2127     return lowerConstantForGV(Op, ProcessingGeneric);
   2128   }
   2129 
   2130   case Instruction::PtrToInt: {
   2131     const DataLayout &DL = getDataLayout();
   2132 
   2133     // Support only foldable casts to/from pointers that can be eliminated by
   2134     // changing the pointer to the appropriately sized integer type.
   2135     Constant *Op = CE->getOperand(0);
   2136     Type *Ty = CE->getType();
   2137 
   2138     const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
   2139 
   2140     // We can emit the pointer value into this slot if the slot is an
   2141     // integer slot equal to the size of the pointer.
   2142     if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
   2143       return OpExpr;
   2144 
   2145     // Otherwise the pointer is smaller than the resultant integer, mask off
   2146     // the high bits so we are sure to get a proper truncation if the input is
   2147     // a constant expr.
   2148     unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
   2149     const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
   2150     return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
   2151   }
   2152 
   2153   // The MC library also has a right-shift operator, but it isn't consistently
   2154   // signed or unsigned between different targets.
   2155   case Instruction::Add: {
   2156     const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
   2157     const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
   2158     switch (CE->getOpcode()) {
   2159     default: llvm_unreachable("Unknown binary operator constant cast expr");
   2160     case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
   2161     }
   2162   }
   2163   }
   2164 }
   2165 
   2166 // Copy of MCExpr::print customized for NVPTX
   2167 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
   2168   switch (Expr.getKind()) {
   2169   case MCExpr::Target:
   2170     return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
   2171   case MCExpr::Constant:
   2172     OS << cast<MCConstantExpr>(Expr).getValue();
   2173     return;
   2174 
   2175   case MCExpr::SymbolRef: {
   2176     const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
   2177     const MCSymbol &Sym = SRE.getSymbol();
   2178     Sym.print(OS, MAI);
   2179     return;
   2180   }
   2181 
   2182   case MCExpr::Unary: {
   2183     const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
   2184     switch (UE.getOpcode()) {
   2185     case MCUnaryExpr::LNot:  OS << '!'; break;
   2186     case MCUnaryExpr::Minus: OS << '-'; break;
   2187     case MCUnaryExpr::Not:   OS << '~'; break;
   2188     case MCUnaryExpr::Plus:  OS << '+'; break;
   2189     }
   2190     printMCExpr(*UE.getSubExpr(), OS);
   2191     return;
   2192   }
   2193 
   2194   case MCExpr::Binary: {
   2195     const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
   2196 
   2197     // Only print parens around the LHS if it is non-trivial.
   2198     if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
   2199         isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
   2200       printMCExpr(*BE.getLHS(), OS);
   2201     } else {
   2202       OS << '(';
   2203       printMCExpr(*BE.getLHS(), OS);
   2204       OS<< ')';
   2205     }
   2206 
   2207     switch (BE.getOpcode()) {
   2208     case MCBinaryExpr::Add:
   2209       // Print "X-42" instead of "X+-42".
   2210       if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
   2211         if (RHSC->getValue() < 0) {
   2212           OS << RHSC->getValue();
   2213           return;
   2214         }
   2215       }
   2216 
   2217       OS <<  '+';
   2218       break;
   2219     default: llvm_unreachable("Unhandled binary operator");
   2220     }
   2221 
   2222     // Only print parens around the LHS if it is non-trivial.
   2223     if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
   2224       printMCExpr(*BE.getRHS(), OS);
   2225     } else {
   2226       OS << '(';
   2227       printMCExpr(*BE.getRHS(), OS);
   2228       OS << ')';
   2229     }
   2230     return;
   2231   }
   2232   }
   2233 
   2234   llvm_unreachable("Invalid expression kind!");
   2235 }
   2236 
   2237 /// PrintAsmOperand - Print out an operand for an inline asm expression.
   2238 ///
   2239 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   2240                                       unsigned AsmVariant,
   2241                                       const char *ExtraCode, raw_ostream &O) {
   2242   if (ExtraCode && ExtraCode[0]) {
   2243     if (ExtraCode[1] != 0)
   2244       return true; // Unknown modifier.
   2245 
   2246     switch (ExtraCode[0]) {
   2247     default:
   2248       // See if this is a generic print operand
   2249       return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
   2250     case 'r':
   2251       break;
   2252     }
   2253   }
   2254 
   2255   printOperand(MI, OpNo, O);
   2256 
   2257   return false;
   2258 }
   2259 
   2260 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
   2261     const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
   2262     const char *ExtraCode, raw_ostream &O) {
   2263   if (ExtraCode && ExtraCode[0])
   2264     return true; // Unknown modifier
   2265 
   2266   O << '[';
   2267   printMemOperand(MI, OpNo, O);
   2268   O << ']';
   2269 
   2270   return false;
   2271 }
   2272 
   2273 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
   2274                                    raw_ostream &O, const char *Modifier) {
   2275   const MachineOperand &MO = MI->getOperand(opNum);
   2276   switch (MO.getType()) {
   2277   case MachineOperand::MO_Register:
   2278     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
   2279       if (MO.getReg() == NVPTX::VRDepot)
   2280         O << DEPOTNAME << getFunctionNumber();
   2281       else
   2282         O << NVPTXInstPrinter::getRegisterName(MO.getReg());
   2283     } else {
   2284       emitVirtualRegister(MO.getReg(), O);
   2285     }
   2286     return;
   2287 
   2288   case MachineOperand::MO_Immediate:
   2289     if (!Modifier)
   2290       O << MO.getImm();
   2291     else if (strstr(Modifier, "vec") == Modifier)
   2292       printVecModifiedImmediate(MO, Modifier, O);
   2293     else
   2294       llvm_unreachable(
   2295           "Don't know how to handle modifier on immediate operand");
   2296     return;
   2297 
   2298   case MachineOperand::MO_FPImmediate:
   2299     printFPConstant(MO.getFPImm(), O);
   2300     break;
   2301 
   2302   case MachineOperand::MO_GlobalAddress:
   2303     getSymbol(MO.getGlobal())->print(O, MAI);
   2304     break;
   2305 
   2306   case MachineOperand::MO_MachineBasicBlock:
   2307     MO.getMBB()->getSymbol()->print(O, MAI);
   2308     return;
   2309 
   2310   default:
   2311     llvm_unreachable("Operand type not supported.");
   2312   }
   2313 }
   2314 
   2315 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
   2316                                       raw_ostream &O, const char *Modifier) {
   2317   printOperand(MI, opNum, O);
   2318 
   2319   if (Modifier && !strcmp(Modifier, "add")) {
   2320     O << ", ";
   2321     printOperand(MI, opNum + 1, O);
   2322   } else {
   2323     if (MI->getOperand(opNum + 1).isImm() &&
   2324         MI->getOperand(opNum + 1).getImm() == 0)
   2325       return; // don't print ',0' or '+0'
   2326     O << "+";
   2327     printOperand(MI, opNum + 1, O);
   2328   }
   2329 }
   2330 
   2331 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   2332   std::stringstream temp;
   2333   LineReader *reader = this->getReader(filename);
   2334   temp << "\n//";
   2335   temp << filename.str();
   2336   temp << ":";
   2337   temp << line;
   2338   temp << " ";
   2339   temp << reader->readLine(line);
   2340   temp << "\n";
   2341   this->OutStreamer->EmitRawText(temp.str());
   2342 }
   2343 
   2344 LineReader *NVPTXAsmPrinter::getReader(const std::string &filename) {
   2345   if (!reader) {
   2346     reader = new LineReader(filename);
   2347   }
   2348 
   2349   if (reader->fileName() != filename) {
   2350     delete reader;
   2351     reader = new LineReader(filename);
   2352   }
   2353 
   2354   return reader;
   2355 }
   2356 
   2357 std::string LineReader::readLine(unsigned lineNum) {
   2358   if (lineNum < theCurLine) {
   2359     theCurLine = 0;
   2360     fstr.seekg(0, std::ios::beg);
   2361   }
   2362   while (theCurLine < lineNum) {
   2363     fstr.getline(buff, 500);
   2364     theCurLine++;
   2365   }
   2366   return buff;
   2367 }
   2368 
   2369 // Force static initialization.
   2370 extern "C" void LLVMInitializeNVPTXAsmPrinter() {
   2371   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
   2372   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
   2373 }
   2374