Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains a printer that converts from our internal representation
     11 // of machine-dependent LLVM code to NVPTX assembly language.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "NVPTXAsmPrinter.h"
     16 #include "InstPrinter/NVPTXInstPrinter.h"
     17 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
     18 #include "NVPTX.h"
     19 #include "NVPTXInstrInfo.h"
     20 #include "NVPTXMachineFunctionInfo.h"
     21 #include "NVPTXMCExpr.h"
     22 #include "NVPTXRegisterInfo.h"
     23 #include "NVPTXTargetMachine.h"
     24 #include "NVPTXUtilities.h"
     25 #include "cl_common_defines.h"
     26 #include "llvm/ADT/StringExtras.h"
     27 #include "llvm/Analysis/ConstantFolding.h"
     28 #include "llvm/CodeGen/Analysis.h"
     29 #include "llvm/CodeGen/MachineFrameInfo.h"
     30 #include "llvm/CodeGen/MachineModuleInfo.h"
     31 #include "llvm/CodeGen/MachineRegisterInfo.h"
     32 #include "llvm/IR/DebugInfo.h"
     33 #include "llvm/IR/DerivedTypes.h"
     34 #include "llvm/IR/Function.h"
     35 #include "llvm/IR/GlobalVariable.h"
     36 #include "llvm/IR/Mangler.h"
     37 #include "llvm/IR/Module.h"
     38 #include "llvm/IR/Operator.h"
     39 #include "llvm/MC/MCStreamer.h"
     40 #include "llvm/MC/MCSymbol.h"
     41 #include "llvm/Support/CommandLine.h"
     42 #include "llvm/Support/ErrorHandling.h"
     43 #include "llvm/Support/FormattedStream.h"
     44 #include "llvm/Support/Path.h"
     45 #include "llvm/Support/TargetRegistry.h"
     46 #include "llvm/Support/TimeValue.h"
     47 #include "llvm/Target/TargetLoweringObjectFile.h"
     48 #include <sstream>
     49 using namespace llvm;
     50 
     51 #define DEPOTNAME "__local_depot"
     52 
     53 static cl::opt<bool>
     54 EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden,
     55                 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
     56                 cl::init(true));
     57 
     58 static cl::opt<bool>
     59 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden,
     60               cl::desc("NVPTX Specific: Emit source line in ptx file"),
     61               cl::init(false));
     62 
     63 namespace {
     64 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
     65 /// depends.
     66 void DiscoverDependentGlobals(const Value *V,
     67                               DenseSet<const GlobalVariable *> &Globals) {
     68   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     69     Globals.insert(GV);
     70   else {
     71     if (const User *U = dyn_cast<User>(V)) {
     72       for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
     73         DiscoverDependentGlobals(U->getOperand(i), Globals);
     74       }
     75     }
     76   }
     77 }
     78 
     79 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
     80 /// instances to be emitted, but only after any dependents have been added
     81 /// first.
     82 void VisitGlobalVariableForEmission(
     83     const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
     84     DenseSet<const GlobalVariable *> &Visited,
     85     DenseSet<const GlobalVariable *> &Visiting) {
     86   // Have we already visited this one?
     87   if (Visited.count(GV))
     88     return;
     89 
     90   // Do we have a circular dependency?
     91   if (Visiting.count(GV))
     92     report_fatal_error("Circular dependency found in global variable set");
     93 
     94   // Start visiting this global
     95   Visiting.insert(GV);
     96 
     97   // Make sure we visit all dependents first
     98   DenseSet<const GlobalVariable *> Others;
     99   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
    100     DiscoverDependentGlobals(GV->getOperand(i), Others);
    101 
    102   for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
    103                                                   E = Others.end();
    104        I != E; ++I)
    105     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
    106 
    107   // Now we can visit ourself
    108   Order.push_back(GV);
    109   Visited.insert(GV);
    110   Visiting.erase(GV);
    111 }
    112 }
    113 
    114 // @TODO: This is a copy from AsmPrinter.cpp.  The function is static, so we
    115 // cannot just link to the existing version.
    116 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
    117 ///
    118 using namespace nvptx;
    119 const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
    120   MCContext &Ctx = AP.OutContext;
    121 
    122   if (CV->isNullValue() || isa<UndefValue>(CV))
    123     return MCConstantExpr::Create(0, Ctx);
    124 
    125   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
    126     return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
    127 
    128   if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
    129     return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
    130 
    131   if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
    132     return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
    133 
    134   const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
    135   if (!CE)
    136     llvm_unreachable("Unknown constant value to lower!");
    137 
    138   switch (CE->getOpcode()) {
    139   default:
    140     // If the code isn't optimized, there may be outstanding folding
    141     // opportunities. Attempt to fold the expression using DataLayout as a
    142     // last resort before giving up.
    143     if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
    144       if (C != CE)
    145         return LowerConstant(C, AP);
    146 
    147     // Otherwise report the problem to the user.
    148     {
    149       std::string S;
    150       raw_string_ostream OS(S);
    151       OS << "Unsupported expression in static initializer: ";
    152       CE->printAsOperand(OS, /*PrintType=*/ false,
    153                          !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
    154       report_fatal_error(OS.str());
    155     }
    156   case Instruction::AddrSpaceCast: {
    157     // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be
    158     // handled by the generic() logic in the MCExpr printer
    159     PointerType *DstTy            = cast<PointerType>(CE->getType());
    160     PointerType *SrcTy            = cast<PointerType>(CE->getOperand(0)->getType());
    161     if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) {
    162       return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP);
    163     }
    164     std::string S;
    165     raw_string_ostream OS(S);
    166     OS << "Unsupported expression in static initializer: ";
    167     CE->printAsOperand(OS, /*PrintType=*/ false,
    168                        !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
    169     report_fatal_error(OS.str());
    170   }
    171   case Instruction::GetElementPtr: {
    172     const DataLayout &TD = *AP.TM.getDataLayout();
    173     // Generate a symbolic expression for the byte address
    174     APInt OffsetAI(TD.getPointerSizeInBits(), 0);
    175     cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
    176 
    177     const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
    178     if (!OffsetAI)
    179       return Base;
    180 
    181     int64_t Offset = OffsetAI.getSExtValue();
    182     return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
    183                                    Ctx);
    184   }
    185 
    186   case Instruction::Trunc:
    187     // We emit the value and depend on the assembler to truncate the generated
    188     // expression properly.  This is important for differences between
    189     // blockaddress labels.  Since the two labels are in the same function, it
    190     // is reasonable to treat their delta as a 32-bit value.
    191   // FALL THROUGH.
    192   case Instruction::BitCast:
    193     return LowerConstant(CE->getOperand(0), AP);
    194 
    195   case Instruction::IntToPtr: {
    196     const DataLayout &TD = *AP.TM.getDataLayout();
    197     // Handle casts to pointers by changing them into casts to the appropriate
    198     // integer type.  This promotes constant folding and simplifies this code.
    199     Constant *Op = CE->getOperand(0);
    200     Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
    201                                       false /*ZExt*/);
    202     return LowerConstant(Op, AP);
    203   }
    204 
    205   case Instruction::PtrToInt: {
    206     const DataLayout &TD = *AP.TM.getDataLayout();
    207     // Support only foldable casts to/from pointers that can be eliminated by
    208     // changing the pointer to the appropriately sized integer type.
    209     Constant *Op = CE->getOperand(0);
    210     Type *Ty = CE->getType();
    211 
    212     const MCExpr *OpExpr = LowerConstant(Op, AP);
    213 
    214     // We can emit the pointer value into this slot if the slot is an
    215     // integer slot equal to the size of the pointer.
    216     if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
    217       return OpExpr;
    218 
    219     // Otherwise the pointer is smaller than the resultant integer, mask off
    220     // the high bits so we are sure to get a proper truncation if the input is
    221     // a constant expr.
    222     unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
    223     const MCExpr *MaskExpr =
    224         MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
    225     return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
    226   }
    227 
    228     // The MC library also has a right-shift operator, but it isn't consistently
    229   // signed or unsigned between different targets.
    230   case Instruction::Add:
    231   case Instruction::Sub:
    232   case Instruction::Mul:
    233   case Instruction::SDiv:
    234   case Instruction::SRem:
    235   case Instruction::Shl:
    236   case Instruction::And:
    237   case Instruction::Or:
    238   case Instruction::Xor: {
    239     const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
    240     const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
    241     switch (CE->getOpcode()) {
    242     default:
    243       llvm_unreachable("Unknown binary operator constant cast expr");
    244     case Instruction::Add:
    245       return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
    246     case Instruction::Sub:
    247       return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
    248     case Instruction::Mul:
    249       return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
    250     case Instruction::SDiv:
    251       return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
    252     case Instruction::SRem:
    253       return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
    254     case Instruction::Shl:
    255       return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
    256     case Instruction::And:
    257       return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
    258     case Instruction::Or:
    259       return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
    260     case Instruction::Xor:
    261       return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
    262     }
    263   }
    264   }
    265 }
    266 
    267 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
    268   if (!EmitLineNumbers)
    269     return;
    270   if (ignoreLoc(MI))
    271     return;
    272 
    273   DebugLoc curLoc = MI.getDebugLoc();
    274 
    275   if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
    276     return;
    277 
    278   if (prevDebugLoc == curLoc)
    279     return;
    280 
    281   prevDebugLoc = curLoc;
    282 
    283   if (curLoc.isUnknown())
    284     return;
    285 
    286   const MachineFunction *MF = MI.getParent()->getParent();
    287   //const TargetMachine &TM = MF->getTarget();
    288 
    289   const LLVMContext &ctx = MF->getFunction()->getContext();
    290   DIScope Scope(curLoc.getScope(ctx));
    291 
    292   assert((!Scope || Scope.isScope()) &&
    293     "Scope of a DebugLoc should be null or a DIScope.");
    294   if (!Scope)
    295      return;
    296 
    297   StringRef fileName(Scope.getFilename());
    298   StringRef dirName(Scope.getDirectory());
    299   SmallString<128> FullPathName = dirName;
    300   if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
    301     sys::path::append(FullPathName, fileName);
    302     fileName = FullPathName.str();
    303   }
    304 
    305   if (filenameMap.find(fileName.str()) == filenameMap.end())
    306     return;
    307 
    308   // Emit the line from the source file.
    309   if (InterleaveSrc)
    310     this->emitSrcInText(fileName.str(), curLoc.getLine());
    311 
    312   std::stringstream temp;
    313   temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
    314        << " " << curLoc.getCol();
    315   OutStreamer.EmitRawText(Twine(temp.str().c_str()));
    316 }
    317 
    318 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    319   SmallString<128> Str;
    320   raw_svector_ostream OS(Str);
    321   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
    322     emitLineNumberAsDotLoc(*MI);
    323 
    324   MCInst Inst;
    325   lowerToMCInst(MI, Inst);
    326   EmitToStreamer(OutStreamer, Inst);
    327 }
    328 
    329 // Handle symbol backtracking for targets that do not support image handles
    330 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
    331                                            unsigned OpNo, MCOperand &MCOp) {
    332   const MachineOperand &MO = MI->getOperand(OpNo);
    333 
    334   switch (MI->getOpcode()) {
    335   default: return false;
    336   case NVPTX::TEX_1D_F32_I32:
    337   case NVPTX::TEX_1D_F32_F32:
    338   case NVPTX::TEX_1D_F32_F32_LEVEL:
    339   case NVPTX::TEX_1D_F32_F32_GRAD:
    340   case NVPTX::TEX_1D_I32_I32:
    341   case NVPTX::TEX_1D_I32_F32:
    342   case NVPTX::TEX_1D_I32_F32_LEVEL:
    343   case NVPTX::TEX_1D_I32_F32_GRAD:
    344   case NVPTX::TEX_1D_ARRAY_F32_I32:
    345   case NVPTX::TEX_1D_ARRAY_F32_F32:
    346   case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
    347   case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
    348   case NVPTX::TEX_1D_ARRAY_I32_I32:
    349   case NVPTX::TEX_1D_ARRAY_I32_F32:
    350   case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
    351   case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
    352   case NVPTX::TEX_2D_F32_I32:
    353   case NVPTX::TEX_2D_F32_F32:
    354   case NVPTX::TEX_2D_F32_F32_LEVEL:
    355   case NVPTX::TEX_2D_F32_F32_GRAD:
    356   case NVPTX::TEX_2D_I32_I32:
    357   case NVPTX::TEX_2D_I32_F32:
    358   case NVPTX::TEX_2D_I32_F32_LEVEL:
    359   case NVPTX::TEX_2D_I32_F32_GRAD:
    360   case NVPTX::TEX_2D_ARRAY_F32_I32:
    361   case NVPTX::TEX_2D_ARRAY_F32_F32:
    362   case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
    363   case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
    364   case NVPTX::TEX_2D_ARRAY_I32_I32:
    365   case NVPTX::TEX_2D_ARRAY_I32_F32:
    366   case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
    367   case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
    368   case NVPTX::TEX_3D_F32_I32:
    369   case NVPTX::TEX_3D_F32_F32:
    370   case NVPTX::TEX_3D_F32_F32_LEVEL:
    371   case NVPTX::TEX_3D_F32_F32_GRAD:
    372   case NVPTX::TEX_3D_I32_I32:
    373   case NVPTX::TEX_3D_I32_F32:
    374   case NVPTX::TEX_3D_I32_F32_LEVEL:
    375   case NVPTX::TEX_3D_I32_F32_GRAD:
    376    {
    377     // This is a texture fetch, so operand 4 is a texref and operand 5 is
    378     // a samplerref
    379     if (OpNo == 4) {
    380       lowerImageHandleSymbol(MO.getImm(), MCOp);
    381       return true;
    382     }
    383     if (OpNo == 5) {
    384       lowerImageHandleSymbol(MO.getImm(), MCOp);
    385       return true;
    386     }
    387 
    388     return false;
    389   }
    390   case NVPTX::SULD_1D_I8_TRAP:
    391   case NVPTX::SULD_1D_I16_TRAP:
    392   case NVPTX::SULD_1D_I32_TRAP:
    393   case NVPTX::SULD_1D_ARRAY_I8_TRAP:
    394   case NVPTX::SULD_1D_ARRAY_I16_TRAP:
    395   case NVPTX::SULD_1D_ARRAY_I32_TRAP:
    396   case NVPTX::SULD_2D_I8_TRAP:
    397   case NVPTX::SULD_2D_I16_TRAP:
    398   case NVPTX::SULD_2D_I32_TRAP:
    399   case NVPTX::SULD_2D_ARRAY_I8_TRAP:
    400   case NVPTX::SULD_2D_ARRAY_I16_TRAP:
    401   case NVPTX::SULD_2D_ARRAY_I32_TRAP:
    402   case NVPTX::SULD_3D_I8_TRAP:
    403   case NVPTX::SULD_3D_I16_TRAP:
    404   case NVPTX::SULD_3D_I32_TRAP: {
    405     // This is a V1 surface load, so operand 1 is a surfref
    406     if (OpNo == 1) {
    407       lowerImageHandleSymbol(MO.getImm(), MCOp);
    408       return true;
    409     }
    410 
    411     return false;
    412   }
    413   case NVPTX::SULD_1D_V2I8_TRAP:
    414   case NVPTX::SULD_1D_V2I16_TRAP:
    415   case NVPTX::SULD_1D_V2I32_TRAP:
    416   case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
    417   case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
    418   case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
    419   case NVPTX::SULD_2D_V2I8_TRAP:
    420   case NVPTX::SULD_2D_V2I16_TRAP:
    421   case NVPTX::SULD_2D_V2I32_TRAP:
    422   case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
    423   case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
    424   case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
    425   case NVPTX::SULD_3D_V2I8_TRAP:
    426   case NVPTX::SULD_3D_V2I16_TRAP:
    427   case NVPTX::SULD_3D_V2I32_TRAP: {
    428     // This is a V2 surface load, so operand 2 is a surfref
    429     if (OpNo == 2) {
    430       lowerImageHandleSymbol(MO.getImm(), MCOp);
    431       return true;
    432     }
    433 
    434     return false;
    435   }
    436   case NVPTX::SULD_1D_V4I8_TRAP:
    437   case NVPTX::SULD_1D_V4I16_TRAP:
    438   case NVPTX::SULD_1D_V4I32_TRAP:
    439   case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
    440   case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
    441   case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
    442   case NVPTX::SULD_2D_V4I8_TRAP:
    443   case NVPTX::SULD_2D_V4I16_TRAP:
    444   case NVPTX::SULD_2D_V4I32_TRAP:
    445   case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
    446   case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
    447   case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
    448   case NVPTX::SULD_3D_V4I8_TRAP:
    449   case NVPTX::SULD_3D_V4I16_TRAP:
    450   case NVPTX::SULD_3D_V4I32_TRAP: {
    451     // This is a V4 surface load, so operand 4 is a surfref
    452     if (OpNo == 4) {
    453       lowerImageHandleSymbol(MO.getImm(), MCOp);
    454       return true;
    455     }
    456 
    457     return false;
    458   }
    459   case NVPTX::SUST_B_1D_B8_TRAP:
    460   case NVPTX::SUST_B_1D_B16_TRAP:
    461   case NVPTX::SUST_B_1D_B32_TRAP:
    462   case NVPTX::SUST_B_1D_V2B8_TRAP:
    463   case NVPTX::SUST_B_1D_V2B16_TRAP:
    464   case NVPTX::SUST_B_1D_V2B32_TRAP:
    465   case NVPTX::SUST_B_1D_V4B8_TRAP:
    466   case NVPTX::SUST_B_1D_V4B16_TRAP:
    467   case NVPTX::SUST_B_1D_V4B32_TRAP:
    468   case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
    469   case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
    470   case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
    471   case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
    472   case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
    473   case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
    474   case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
    475   case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
    476   case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
    477   case NVPTX::SUST_B_2D_B8_TRAP:
    478   case NVPTX::SUST_B_2D_B16_TRAP:
    479   case NVPTX::SUST_B_2D_B32_TRAP:
    480   case NVPTX::SUST_B_2D_V2B8_TRAP:
    481   case NVPTX::SUST_B_2D_V2B16_TRAP:
    482   case NVPTX::SUST_B_2D_V2B32_TRAP:
    483   case NVPTX::SUST_B_2D_V4B8_TRAP:
    484   case NVPTX::SUST_B_2D_V4B16_TRAP:
    485   case NVPTX::SUST_B_2D_V4B32_TRAP:
    486   case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
    487   case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
    488   case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
    489   case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
    490   case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
    491   case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
    492   case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
    493   case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
    494   case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
    495   case NVPTX::SUST_B_3D_B8_TRAP:
    496   case NVPTX::SUST_B_3D_B16_TRAP:
    497   case NVPTX::SUST_B_3D_B32_TRAP:
    498   case NVPTX::SUST_B_3D_V2B8_TRAP:
    499   case NVPTX::SUST_B_3D_V2B16_TRAP:
    500   case NVPTX::SUST_B_3D_V2B32_TRAP:
    501   case NVPTX::SUST_B_3D_V4B8_TRAP:
    502   case NVPTX::SUST_B_3D_V4B16_TRAP:
    503   case NVPTX::SUST_B_3D_V4B32_TRAP:
    504   case NVPTX::SUST_P_1D_B8_TRAP:
    505   case NVPTX::SUST_P_1D_B16_TRAP:
    506   case NVPTX::SUST_P_1D_B32_TRAP:
    507   case NVPTX::SUST_P_1D_V2B8_TRAP:
    508   case NVPTX::SUST_P_1D_V2B16_TRAP:
    509   case NVPTX::SUST_P_1D_V2B32_TRAP:
    510   case NVPTX::SUST_P_1D_V4B8_TRAP:
    511   case NVPTX::SUST_P_1D_V4B16_TRAP:
    512   case NVPTX::SUST_P_1D_V4B32_TRAP:
    513   case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
    514   case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
    515   case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
    516   case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
    517   case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
    518   case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
    519   case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
    520   case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
    521   case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
    522   case NVPTX::SUST_P_2D_B8_TRAP:
    523   case NVPTX::SUST_P_2D_B16_TRAP:
    524   case NVPTX::SUST_P_2D_B32_TRAP:
    525   case NVPTX::SUST_P_2D_V2B8_TRAP:
    526   case NVPTX::SUST_P_2D_V2B16_TRAP:
    527   case NVPTX::SUST_P_2D_V2B32_TRAP:
    528   case NVPTX::SUST_P_2D_V4B8_TRAP:
    529   case NVPTX::SUST_P_2D_V4B16_TRAP:
    530   case NVPTX::SUST_P_2D_V4B32_TRAP:
    531   case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
    532   case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
    533   case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
    534   case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
    535   case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
    536   case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
    537   case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
    538   case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
    539   case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
    540   case NVPTX::SUST_P_3D_B8_TRAP:
    541   case NVPTX::SUST_P_3D_B16_TRAP:
    542   case NVPTX::SUST_P_3D_B32_TRAP:
    543   case NVPTX::SUST_P_3D_V2B8_TRAP:
    544   case NVPTX::SUST_P_3D_V2B16_TRAP:
    545   case NVPTX::SUST_P_3D_V2B32_TRAP:
    546   case NVPTX::SUST_P_3D_V4B8_TRAP:
    547   case NVPTX::SUST_P_3D_V4B16_TRAP:
    548   case NVPTX::SUST_P_3D_V4B32_TRAP: {
    549     // This is a surface store, so operand 0 is a surfref
    550     if (OpNo == 0) {
    551       lowerImageHandleSymbol(MO.getImm(), MCOp);
    552       return true;
    553     }
    554 
    555     return false;
    556   }
    557   case NVPTX::TXQ_CHANNEL_ORDER:
    558   case NVPTX::TXQ_CHANNEL_DATA_TYPE:
    559   case NVPTX::TXQ_WIDTH:
    560   case NVPTX::TXQ_HEIGHT:
    561   case NVPTX::TXQ_DEPTH:
    562   case NVPTX::TXQ_ARRAY_SIZE:
    563   case NVPTX::TXQ_NUM_SAMPLES:
    564   case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
    565   case NVPTX::SUQ_CHANNEL_ORDER:
    566   case NVPTX::SUQ_CHANNEL_DATA_TYPE:
    567   case NVPTX::SUQ_WIDTH:
    568   case NVPTX::SUQ_HEIGHT:
    569   case NVPTX::SUQ_DEPTH:
    570   case NVPTX::SUQ_ARRAY_SIZE: {
    571     // This is a query, so operand 1 is a surfref/texref
    572     if (OpNo == 1) {
    573       lowerImageHandleSymbol(MO.getImm(), MCOp);
    574       return true;
    575     }
    576 
    577     return false;
    578   }
    579   }
    580 }
    581 
    582 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
    583   // Ewwww
    584   TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
    585   NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
    586   const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
    587   const char *Sym = MFI->getImageHandleSymbol(Index);
    588   std::string *SymNamePtr =
    589     nvTM.getManagedStrPool()->getManagedString(Sym);
    590   MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol(
    591     StringRef(SymNamePtr->c_str())));
    592 }
    593 
    594 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
    595   OutMI.setOpcode(MI->getOpcode());
    596   const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>();
    597 
    598   // Special: Do not mangle symbol operand of CALL_PROTOTYPE
    599   if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
    600     const MachineOperand &MO = MI->getOperand(0);
    601     OutMI.addOperand(GetSymbolRef(
    602       OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
    603     return;
    604   }
    605 
    606   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    607     const MachineOperand &MO = MI->getOperand(i);
    608 
    609     MCOperand MCOp;
    610     if (!ST.hasImageHandles()) {
    611       if (lowerImageHandleOperand(MI, i, MCOp)) {
    612         OutMI.addOperand(MCOp);
    613         continue;
    614       }
    615     }
    616 
    617     if (lowerOperand(MO, MCOp))
    618       OutMI.addOperand(MCOp);
    619   }
    620 }
    621 
    622 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
    623                                    MCOperand &MCOp) {
    624   switch (MO.getType()) {
    625   default: llvm_unreachable("unknown operand type");
    626   case MachineOperand::MO_Register:
    627     MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg()));
    628     break;
    629   case MachineOperand::MO_Immediate:
    630     MCOp = MCOperand::CreateImm(MO.getImm());
    631     break;
    632   case MachineOperand::MO_MachineBasicBlock:
    633     MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
    634         MO.getMBB()->getSymbol(), OutContext));
    635     break;
    636   case MachineOperand::MO_ExternalSymbol:
    637     MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
    638     break;
    639   case MachineOperand::MO_GlobalAddress:
    640     MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
    641     break;
    642   case MachineOperand::MO_FPImmediate: {
    643     const ConstantFP *Cnt = MO.getFPImm();
    644     APFloat Val = Cnt->getValueAPF();
    645 
    646     switch (Cnt->getType()->getTypeID()) {
    647     default: report_fatal_error("Unsupported FP type"); break;
    648     case Type::FloatTyID:
    649       MCOp = MCOperand::CreateExpr(
    650         NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext));
    651       break;
    652     case Type::DoubleTyID:
    653       MCOp = MCOperand::CreateExpr(
    654         NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext));
    655       break;
    656     }
    657     break;
    658   }
    659   }
    660   return true;
    661 }
    662 
    663 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
    664   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    665     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
    666 
    667     DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
    668     unsigned RegNum = RegMap[Reg];
    669 
    670     // Encode the register class in the upper 4 bits
    671     // Must be kept in sync with NVPTXInstPrinter::printRegName
    672     unsigned Ret = 0;
    673     if (RC == &NVPTX::Int1RegsRegClass) {
    674       Ret = (1 << 28);
    675     } else if (RC == &NVPTX::Int16RegsRegClass) {
    676       Ret = (2 << 28);
    677     } else if (RC == &NVPTX::Int32RegsRegClass) {
    678       Ret = (3 << 28);
    679     } else if (RC == &NVPTX::Int64RegsRegClass) {
    680       Ret = (4 << 28);
    681     } else if (RC == &NVPTX::Float32RegsRegClass) {
    682       Ret = (5 << 28);
    683     } else if (RC == &NVPTX::Float64RegsRegClass) {
    684       Ret = (6 << 28);
    685     } else {
    686       report_fatal_error("Bad register class");
    687     }
    688 
    689     // Insert the vreg number
    690     Ret |= (RegNum & 0x0FFFFFFF);
    691     return Ret;
    692   } else {
    693     // Some special-use registers are actually physical registers.
    694     // Encode this as the register class ID of 0 and the real register ID.
    695     return Reg & 0x0FFFFFFF;
    696   }
    697 }
    698 
    699 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
    700   const MCExpr *Expr;
    701   Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
    702                                  OutContext);
    703   return MCOperand::CreateExpr(Expr);
    704 }
    705 
    706 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
    707   const DataLayout *TD = TM.getDataLayout();
    708   const TargetLowering *TLI = TM.getTargetLowering();
    709 
    710   Type *Ty = F->getReturnType();
    711 
    712   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
    713 
    714   if (Ty->getTypeID() == Type::VoidTyID)
    715     return;
    716 
    717   O << " (";
    718 
    719   if (isABI) {
    720     if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
    721       unsigned size = 0;
    722       if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
    723         size = ITy->getBitWidth();
    724         if (size < 32)
    725           size = 32;
    726       } else {
    727         assert(Ty->isFloatingPointTy() && "Floating point type expected here");
    728         size = Ty->getPrimitiveSizeInBits();
    729       }
    730 
    731       O << ".param .b" << size << " func_retval0";
    732     } else if (isa<PointerType>(Ty)) {
    733       O << ".param .b" << TLI->getPointerTy().getSizeInBits()
    734         << " func_retval0";
    735     } else {
    736       if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
    737         unsigned totalsz = TD->getTypeAllocSize(Ty);
    738         unsigned retAlignment = 0;
    739         if (!llvm::getAlign(*F, 0, retAlignment))
    740           retAlignment = TD->getABITypeAlignment(Ty);
    741         O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
    742           << "]";
    743       } else
    744         assert(false && "Unknown return type");
    745     }
    746   } else {
    747     SmallVector<EVT, 16> vtparts;
    748     ComputeValueVTs(*TLI, Ty, vtparts);
    749     unsigned idx = 0;
    750     for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
    751       unsigned elems = 1;
    752       EVT elemtype = vtparts[i];
    753       if (vtparts[i].isVector()) {
    754         elems = vtparts[i].getVectorNumElements();
    755         elemtype = vtparts[i].getVectorElementType();
    756       }
    757 
    758       for (unsigned j = 0, je = elems; j != je; ++j) {
    759         unsigned sz = elemtype.getSizeInBits();
    760         if (elemtype.isInteger() && (sz < 32))
    761           sz = 32;
    762         O << ".reg .b" << sz << " func_retval" << idx;
    763         if (j < je - 1)
    764           O << ", ";
    765         ++idx;
    766       }
    767       if (i < e - 1)
    768         O << ", ";
    769     }
    770   }
    771   O << ") ";
    772   return;
    773 }
    774 
    775 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
    776                                         raw_ostream &O) {
    777   const Function *F = MF.getFunction();
    778   printReturnValStr(F, O);
    779 }
    780 
    781 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
    782   SmallString<128> Str;
    783   raw_svector_ostream O(Str);
    784 
    785   if (!GlobalsEmitted) {
    786     emitGlobals(*MF->getFunction()->getParent());
    787     GlobalsEmitted = true;
    788   }
    789 
    790   // Set up
    791   MRI = &MF->getRegInfo();
    792   F = MF->getFunction();
    793   emitLinkageDirective(F, O);
    794   if (llvm::isKernelFunction(*F))
    795     O << ".entry ";
    796   else {
    797     O << ".func ";
    798     printReturnValStr(*MF, O);
    799   }
    800 
    801   O << *CurrentFnSym;
    802 
    803   emitFunctionParamList(*MF, O);
    804 
    805   if (llvm::isKernelFunction(*F))
    806     emitKernelFunctionDirectives(*F, O);
    807 
    808   OutStreamer.EmitRawText(O.str());
    809 
    810   prevDebugLoc = DebugLoc();
    811 }
    812 
    813 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
    814   VRegMapping.clear();
    815   OutStreamer.EmitRawText(StringRef("{\n"));
    816   setAndEmitFunctionVirtualRegisters(*MF);
    817 
    818   SmallString<128> Str;
    819   raw_svector_ostream O(Str);
    820   emitDemotedVars(MF->getFunction(), O);
    821   OutStreamer.EmitRawText(O.str());
    822 }
    823 
    824 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
    825   OutStreamer.EmitRawText(StringRef("}\n"));
    826   VRegMapping.clear();
    827 }
    828 
    829 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
    830   unsigned RegNo = MI->getOperand(0).getReg();
    831   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
    832   if (TRI->isVirtualRegister(RegNo)) {
    833     OutStreamer.AddComment(Twine("implicit-def: ") +
    834                            getVirtualRegisterName(RegNo));
    835   } else {
    836     OutStreamer.AddComment(Twine("implicit-def: ") +
    837                            TM.getRegisterInfo()->getName(RegNo));
    838   }
    839   OutStreamer.AddBlankLine();
    840 }
    841 
    842 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
    843                                                    raw_ostream &O) const {
    844   // If the NVVM IR has some of reqntid* specified, then output
    845   // the reqntid directive, and set the unspecified ones to 1.
    846   // If none of reqntid* is specified, don't output reqntid directive.
    847   unsigned reqntidx, reqntidy, reqntidz;
    848   bool specified = false;
    849   if (llvm::getReqNTIDx(F, reqntidx) == false)
    850     reqntidx = 1;
    851   else
    852     specified = true;
    853   if (llvm::getReqNTIDy(F, reqntidy) == false)
    854     reqntidy = 1;
    855   else
    856     specified = true;
    857   if (llvm::getReqNTIDz(F, reqntidz) == false)
    858     reqntidz = 1;
    859   else
    860     specified = true;
    861 
    862   if (specified)
    863     O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
    864       << "\n";
    865 
    866   // If the NVVM IR has some of maxntid* specified, then output
    867   // the maxntid directive, and set the unspecified ones to 1.
    868   // If none of maxntid* is specified, don't output maxntid directive.
    869   unsigned maxntidx, maxntidy, maxntidz;
    870   specified = false;
    871   if (llvm::getMaxNTIDx(F, maxntidx) == false)
    872     maxntidx = 1;
    873   else
    874     specified = true;
    875   if (llvm::getMaxNTIDy(F, maxntidy) == false)
    876     maxntidy = 1;
    877   else
    878     specified = true;
    879   if (llvm::getMaxNTIDz(F, maxntidz) == false)
    880     maxntidz = 1;
    881   else
    882     specified = true;
    883 
    884   if (specified)
    885     O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
    886       << "\n";
    887 
    888   unsigned mincta;
    889   if (llvm::getMinCTASm(F, mincta))
    890     O << ".minnctapersm " << mincta << "\n";
    891 }
    892 
    893 std::string
    894 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
    895   const TargetRegisterClass *RC = MRI->getRegClass(Reg);
    896 
    897   std::string Name;
    898   raw_string_ostream NameStr(Name);
    899 
    900   VRegRCMap::const_iterator I = VRegMapping.find(RC);
    901   assert(I != VRegMapping.end() && "Bad register class");
    902   const DenseMap<unsigned, unsigned> &RegMap = I->second;
    903 
    904   VRegMap::const_iterator VI = RegMap.find(Reg);
    905   assert(VI != RegMap.end() && "Bad virtual register");
    906   unsigned MappedVR = VI->second;
    907 
    908   NameStr << getNVPTXRegClassStr(RC) << MappedVR;
    909 
    910   NameStr.flush();
    911   return Name;
    912 }
    913 
    914 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
    915                                           raw_ostream &O) {
    916   O << getVirtualRegisterName(vr);
    917 }
    918 
    919 void NVPTXAsmPrinter::printVecModifiedImmediate(
    920     const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
    921   static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
    922   int Imm = (int) MO.getImm();
    923   if (0 == strcmp(Modifier, "vecelem"))
    924     O << "_" << vecelem[Imm];
    925   else if (0 == strcmp(Modifier, "vecv4comm1")) {
    926     if ((Imm < 0) || (Imm > 3))
    927       O << "//";
    928   } else if (0 == strcmp(Modifier, "vecv4comm2")) {
    929     if ((Imm < 4) || (Imm > 7))
    930       O << "//";
    931   } else if (0 == strcmp(Modifier, "vecv4pos")) {
    932     if (Imm < 0)
    933       Imm = 0;
    934     O << "_" << vecelem[Imm % 4];
    935   } else if (0 == strcmp(Modifier, "vecv2comm1")) {
    936     if ((Imm < 0) || (Imm > 1))
    937       O << "//";
    938   } else if (0 == strcmp(Modifier, "vecv2comm2")) {
    939     if ((Imm < 2) || (Imm > 3))
    940       O << "//";
    941   } else if (0 == strcmp(Modifier, "vecv2pos")) {
    942     if (Imm < 0)
    943       Imm = 0;
    944     O << "_" << vecelem[Imm % 2];
    945   } else
    946     llvm_unreachable("Unknown Modifier on immediate operand");
    947 }
    948 
    949 
    950 
    951 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
    952 
    953   emitLinkageDirective(F, O);
    954   if (llvm::isKernelFunction(*F))
    955     O << ".entry ";
    956   else
    957     O << ".func ";
    958   printReturnValStr(F, O);
    959   O << *getSymbol(F) << "\n";
    960   emitFunctionParamList(F, O);
    961   O << ";\n";
    962 }
    963 
    964 static bool usedInGlobalVarDef(const Constant *C) {
    965   if (!C)
    966     return false;
    967 
    968   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
    969     if (GV->getName().str() == "llvm.used")
    970       return false;
    971     return true;
    972   }
    973 
    974   for (const User *U : C->users())
    975     if (const Constant *C = dyn_cast<Constant>(U))
    976       if (usedInGlobalVarDef(C))
    977         return true;
    978 
    979   return false;
    980 }
    981 
    982 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
    983   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
    984     if (othergv->getName().str() == "llvm.used")
    985       return true;
    986   }
    987 
    988   if (const Instruction *instr = dyn_cast<Instruction>(U)) {
    989     if (instr->getParent() && instr->getParent()->getParent()) {
    990       const Function *curFunc = instr->getParent()->getParent();
    991       if (oneFunc && (curFunc != oneFunc))
    992         return false;
    993       oneFunc = curFunc;
    994       return true;
    995     } else
    996       return false;
    997   }
    998 
    999   if (const MDNode *md = dyn_cast<MDNode>(U))
   1000     if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
   1001                           (md->getName().str() == "llvm.dbg.sp")))
   1002       return true;
   1003 
   1004   for (const User *UU : U->users())
   1005     if (usedInOneFunc(UU, oneFunc) == false)
   1006       return false;
   1007 
   1008   return true;
   1009 }
   1010 
   1011 /* Find out if a global variable can be demoted to local scope.
   1012  * Currently, this is valid for CUDA shared variables, which have local
   1013  * scope and global lifetime. So the conditions to check are :
   1014  * 1. Is the global variable in shared address space?
   1015  * 2. Does it have internal linkage?
   1016  * 3. Is the global variable referenced only in one function?
   1017  */
   1018 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
   1019   if (gv->hasInternalLinkage() == false)
   1020     return false;
   1021   const PointerType *Pty = gv->getType();
   1022   if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
   1023     return false;
   1024 
   1025   const Function *oneFunc = nullptr;
   1026 
   1027   bool flag = usedInOneFunc(gv, oneFunc);
   1028   if (flag == false)
   1029     return false;
   1030   if (!oneFunc)
   1031     return false;
   1032   f = oneFunc;
   1033   return true;
   1034 }
   1035 
   1036 static bool useFuncSeen(const Constant *C,
   1037                         llvm::DenseMap<const Function *, bool> &seenMap) {
   1038   for (const User *U : C->users()) {
   1039     if (const Constant *cu = dyn_cast<Constant>(U)) {
   1040       if (useFuncSeen(cu, seenMap))
   1041         return true;
   1042     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
   1043       const BasicBlock *bb = I->getParent();
   1044       if (!bb)
   1045         continue;
   1046       const Function *caller = bb->getParent();
   1047       if (!caller)
   1048         continue;
   1049       if (seenMap.find(caller) != seenMap.end())
   1050         return true;
   1051     }
   1052   }
   1053   return false;
   1054 }
   1055 
   1056 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
   1057   llvm::DenseMap<const Function *, bool> seenMap;
   1058   for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
   1059     const Function *F = FI;
   1060 
   1061     if (F->isDeclaration()) {
   1062       if (F->use_empty())
   1063         continue;
   1064       if (F->getIntrinsicID())
   1065         continue;
   1066       emitDeclaration(F, O);
   1067       continue;
   1068     }
   1069     for (const User *U : F->users()) {
   1070       if (const Constant *C = dyn_cast<Constant>(U)) {
   1071         if (usedInGlobalVarDef(C)) {
   1072           // The use is in the initialization of a global variable
   1073           // that is a function pointer, so print a declaration
   1074           // for the original function
   1075           emitDeclaration(F, O);
   1076           break;
   1077         }
   1078         // Emit a declaration of this function if the function that
   1079         // uses this constant expr has already been seen.
   1080         if (useFuncSeen(C, seenMap)) {
   1081           emitDeclaration(F, O);
   1082           break;
   1083         }
   1084       }
   1085 
   1086       if (!isa<Instruction>(U))
   1087         continue;
   1088       const Instruction *instr = cast<Instruction>(U);
   1089       const BasicBlock *bb = instr->getParent();
   1090       if (!bb)
   1091         continue;
   1092       const Function *caller = bb->getParent();
   1093       if (!caller)
   1094         continue;
   1095 
   1096       // If a caller has already been seen, then the caller is
   1097       // appearing in the module before the callee. so print out
   1098       // a declaration for the callee.
   1099       if (seenMap.find(caller) != seenMap.end()) {
   1100         emitDeclaration(F, O);
   1101         break;
   1102       }
   1103     }
   1104     seenMap[F] = true;
   1105   }
   1106 }
   1107 
   1108 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   1109   DebugInfoFinder DbgFinder;
   1110   DbgFinder.processModule(M);
   1111 
   1112   unsigned i = 1;
   1113   for (DICompileUnit DIUnit : DbgFinder.compile_units()) {
   1114     StringRef Filename(DIUnit.getFilename());
   1115     StringRef Dirname(DIUnit.getDirectory());
   1116     SmallString<128> FullPathName = Dirname;
   1117     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
   1118       sys::path::append(FullPathName, Filename);
   1119       Filename = FullPathName.str();
   1120     }
   1121     if (filenameMap.find(Filename.str()) != filenameMap.end())
   1122       continue;
   1123     filenameMap[Filename.str()] = i;
   1124     OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
   1125     ++i;
   1126   }
   1127 
   1128   for (DISubprogram SP : DbgFinder.subprograms()) {
   1129     StringRef Filename(SP.getFilename());
   1130     StringRef Dirname(SP.getDirectory());
   1131     SmallString<128> FullPathName = Dirname;
   1132     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
   1133       sys::path::append(FullPathName, Filename);
   1134       Filename = FullPathName.str();
   1135     }
   1136     if (filenameMap.find(Filename.str()) != filenameMap.end())
   1137       continue;
   1138     filenameMap[Filename.str()] = i;
   1139     ++i;
   1140   }
   1141 }
   1142 
   1143 bool NVPTXAsmPrinter::doInitialization(Module &M) {
   1144 
   1145   SmallString<128> Str1;
   1146   raw_svector_ostream OS1(Str1);
   1147 
   1148   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   1149   MMI->AnalyzeModule(M);
   1150 
   1151   // We need to call the parent's one explicitly.
   1152   //bool Result = AsmPrinter::doInitialization(M);
   1153 
   1154   // Initialize TargetLoweringObjectFile.
   1155   const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
   1156       .Initialize(OutContext, TM);
   1157 
   1158   Mang = new Mangler(TM.getDataLayout());
   1159 
   1160   // Emit header before any dwarf directives are emitted below.
   1161   emitHeader(M, OS1);
   1162   OutStreamer.EmitRawText(OS1.str());
   1163 
   1164   // Already commented out
   1165   //bool Result = AsmPrinter::doInitialization(M);
   1166 
   1167   // Emit module-level inline asm if it exists.
   1168   if (!M.getModuleInlineAsm().empty()) {
   1169     OutStreamer.AddComment("Start of file scope inline assembly");
   1170     OutStreamer.AddBlankLine();
   1171     OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm()));
   1172     OutStreamer.AddBlankLine();
   1173     OutStreamer.AddComment("End of file scope inline assembly");
   1174     OutStreamer.AddBlankLine();
   1175   }
   1176 
   1177   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
   1178     recordAndEmitFilenames(M);
   1179 
   1180   GlobalsEmitted = false;
   1181 
   1182   return false; // success
   1183 }
   1184 
   1185 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
   1186   SmallString<128> Str2;
   1187   raw_svector_ostream OS2(Str2);
   1188 
   1189   emitDeclarations(M, OS2);
   1190 
   1191   // As ptxas does not support forward references of globals, we need to first
   1192   // sort the list of module-level globals in def-use order. We visit each
   1193   // global variable in order, and ensure that we emit it *after* its dependent
   1194   // globals. We use a little extra memory maintaining both a set and a list to
   1195   // have fast searches while maintaining a strict ordering.
   1196   SmallVector<const GlobalVariable *, 8> Globals;
   1197   DenseSet<const GlobalVariable *> GVVisited;
   1198   DenseSet<const GlobalVariable *> GVVisiting;
   1199 
   1200   // Visit each global variable, in order
   1201   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
   1202        I != E; ++I)
   1203     VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
   1204 
   1205   assert(GVVisited.size() == M.getGlobalList().size() &&
   1206          "Missed a global variable");
   1207   assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
   1208 
   1209   // Print out module-level global variables in proper order
   1210   for (unsigned i = 0, e = Globals.size(); i != e; ++i)
   1211     printModuleLevelGV(Globals[i], OS2);
   1212 
   1213   OS2 << '\n';
   1214 
   1215   OutStreamer.EmitRawText(OS2.str());
   1216 }
   1217 
   1218 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
   1219   O << "//\n";
   1220   O << "// Generated by LLVM NVPTX Back-End\n";
   1221   O << "//\n";
   1222   O << "\n";
   1223 
   1224   unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
   1225   O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
   1226 
   1227   O << ".target ";
   1228   O << nvptxSubtarget.getTargetName();
   1229 
   1230   if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
   1231     O << ", texmode_independent";
   1232   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
   1233     if (!nvptxSubtarget.hasDouble())
   1234       O << ", map_f64_to_f32";
   1235   }
   1236 
   1237   if (MAI->doesSupportDebugInformation())
   1238     O << ", debug";
   1239 
   1240   O << "\n";
   1241 
   1242   O << ".address_size ";
   1243   if (nvptxSubtarget.is64Bit())
   1244     O << "64";
   1245   else
   1246     O << "32";
   1247   O << "\n";
   1248 
   1249   O << "\n";
   1250 }
   1251 
   1252 bool NVPTXAsmPrinter::doFinalization(Module &M) {
   1253 
   1254   // If we did not emit any functions, then the global declarations have not
   1255   // yet been emitted.
   1256   if (!GlobalsEmitted) {
   1257     emitGlobals(M);
   1258     GlobalsEmitted = true;
   1259   }
   1260 
   1261   // XXX Temproarily remove global variables so that doFinalization() will not
   1262   // emit them again (global variables are emitted at beginning).
   1263 
   1264   Module::GlobalListType &global_list = M.getGlobalList();
   1265   int i, n = global_list.size();
   1266   GlobalVariable **gv_array = new GlobalVariable *[n];
   1267 
   1268   // first, back-up GlobalVariable in gv_array
   1269   i = 0;
   1270   for (Module::global_iterator I = global_list.begin(), E = global_list.end();
   1271        I != E; ++I)
   1272     gv_array[i++] = &*I;
   1273 
   1274   // second, empty global_list
   1275   while (!global_list.empty())
   1276     global_list.remove(global_list.begin());
   1277 
   1278   // call doFinalization
   1279   bool ret = AsmPrinter::doFinalization(M);
   1280 
   1281   // now we restore global variables
   1282   for (i = 0; i < n; i++)
   1283     global_list.insert(global_list.end(), gv_array[i]);
   1284 
   1285   clearAnnotationCache(&M);
   1286 
   1287   delete[] gv_array;
   1288   return ret;
   1289 
   1290   //bool Result = AsmPrinter::doFinalization(M);
   1291   // Instead of calling the parents doFinalization, we may
   1292   // clone parents doFinalization and customize here.
   1293   // Currently, we if NVISA out the EmitGlobals() in
   1294   // parent's doFinalization, which is too intrusive.
   1295   //
   1296   // Same for the doInitialization.
   1297   //return Result;
   1298 }
   1299 
   1300 // This function emits appropriate linkage directives for
   1301 // functions and global variables.
   1302 //
   1303 // extern function declaration            -> .extern
   1304 // extern function definition             -> .visible
   1305 // external global variable with init     -> .visible
   1306 // external without init                  -> .extern
   1307 // appending                              -> not allowed, assert.
   1308 // for any linkage other than
   1309 // internal, private, linker_private,
   1310 // linker_private_weak, linker_private_weak_def_auto,
   1311 // we emit                                -> .weak.
   1312 
   1313 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
   1314                                            raw_ostream &O) {
   1315   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
   1316     if (V->hasExternalLinkage()) {
   1317       if (isa<GlobalVariable>(V)) {
   1318         const GlobalVariable *GVar = cast<GlobalVariable>(V);
   1319         if (GVar) {
   1320           if (GVar->hasInitializer())
   1321             O << ".visible ";
   1322           else
   1323             O << ".extern ";
   1324         }
   1325       } else if (V->isDeclaration())
   1326         O << ".extern ";
   1327       else
   1328         O << ".visible ";
   1329     } else if (V->hasAppendingLinkage()) {
   1330       std::string msg;
   1331       msg.append("Error: ");
   1332       msg.append("Symbol ");
   1333       if (V->hasName())
   1334         msg.append(V->getName().str());
   1335       msg.append("has unsupported appending linkage type");
   1336       llvm_unreachable(msg.c_str());
   1337     } else if (!V->hasInternalLinkage() &&
   1338                !V->hasPrivateLinkage()) {
   1339       O << ".weak ";
   1340     }
   1341   }
   1342 }
   1343 
   1344 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
   1345                                          raw_ostream &O,
   1346                                          bool processDemoted) {
   1347 
   1348   // Skip meta data
   1349   if (GVar->hasSection()) {
   1350     if (GVar->getSection() == StringRef("llvm.metadata"))
   1351       return;
   1352   }
   1353 
   1354   // Skip LLVM intrinsic global variables
   1355   if (GVar->getName().startswith("llvm.") ||
   1356       GVar->getName().startswith("nvvm."))
   1357     return;
   1358 
   1359   const DataLayout *TD = TM.getDataLayout();
   1360 
   1361   // GlobalVariables are always constant pointers themselves.
   1362   const PointerType *PTy = GVar->getType();
   1363   Type *ETy = PTy->getElementType();
   1364 
   1365   if (GVar->hasExternalLinkage()) {
   1366     if (GVar->hasInitializer())
   1367       O << ".visible ";
   1368     else
   1369       O << ".extern ";
   1370   } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
   1371              GVar->hasAvailableExternallyLinkage() ||
   1372              GVar->hasCommonLinkage()) {
   1373     O << ".weak ";
   1374   }
   1375 
   1376   if (llvm::isTexture(*GVar)) {
   1377     O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
   1378     return;
   1379   }
   1380 
   1381   if (llvm::isSurface(*GVar)) {
   1382     O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
   1383     return;
   1384   }
   1385 
   1386   if (GVar->isDeclaration()) {
   1387     // (extern) declarations, no definition or initializer
   1388     // Currently the only known declaration is for an automatic __local
   1389     // (.shared) promoted to global.
   1390     emitPTXGlobalVariable(GVar, O);
   1391     O << ";\n";
   1392     return;
   1393   }
   1394 
   1395   if (llvm::isSampler(*GVar)) {
   1396     O << ".global .samplerref " << llvm::getSamplerName(*GVar);
   1397 
   1398     const Constant *Initializer = nullptr;
   1399     if (GVar->hasInitializer())
   1400       Initializer = GVar->getInitializer();
   1401     const ConstantInt *CI = nullptr;
   1402     if (Initializer)
   1403       CI = dyn_cast<ConstantInt>(Initializer);
   1404     if (CI) {
   1405       unsigned sample = CI->getZExtValue();
   1406 
   1407       O << " = { ";
   1408 
   1409       for (int i = 0,
   1410                addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
   1411            i < 3; i++) {
   1412         O << "addr_mode_" << i << " = ";
   1413         switch (addr) {
   1414         case 0:
   1415           O << "wrap";
   1416           break;
   1417         case 1:
   1418           O << "clamp_to_border";
   1419           break;
   1420         case 2:
   1421           O << "clamp_to_edge";
   1422           break;
   1423         case 3:
   1424           O << "wrap";
   1425           break;
   1426         case 4:
   1427           O << "mirror";
   1428           break;
   1429         }
   1430         O << ", ";
   1431       }
   1432       O << "filter_mode = ";
   1433       switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
   1434       case 0:
   1435         O << "nearest";
   1436         break;
   1437       case 1:
   1438         O << "linear";
   1439         break;
   1440       case 2:
   1441         llvm_unreachable("Anisotropic filtering is not supported");
   1442       default:
   1443         O << "nearest";
   1444         break;
   1445       }
   1446       if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
   1447         O << ", force_unnormalized_coords = 1";
   1448       }
   1449       O << " }";
   1450     }
   1451 
   1452     O << ";\n";
   1453     return;
   1454   }
   1455 
   1456   if (GVar->hasPrivateLinkage()) {
   1457 
   1458     if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
   1459       return;
   1460 
   1461     // FIXME - need better way (e.g. Metadata) to avoid generating this global
   1462     if (!strncmp(GVar->getName().data(), "filename", 8))
   1463       return;
   1464     if (GVar->use_empty())
   1465       return;
   1466   }
   1467 
   1468   const Function *demotedFunc = nullptr;
   1469   if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
   1470     O << "// " << GVar->getName().str() << " has been demoted\n";
   1471     if (localDecls.find(demotedFunc) != localDecls.end())
   1472       localDecls[demotedFunc].push_back(GVar);
   1473     else {
   1474       std::vector<const GlobalVariable *> temp;
   1475       temp.push_back(GVar);
   1476       localDecls[demotedFunc] = temp;
   1477     }
   1478     return;
   1479   }
   1480 
   1481   O << ".";
   1482   emitPTXAddressSpace(PTy->getAddressSpace(), O);
   1483 
   1484   if (isManaged(*GVar)) {
   1485     O << " .attribute(.managed)";
   1486   }
   1487 
   1488   if (GVar->getAlignment() == 0)
   1489     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
   1490   else
   1491     O << " .align " << GVar->getAlignment();
   1492 
   1493   if (ETy->isSingleValueType()) {
   1494     O << " .";
   1495     // Special case: ABI requires that we use .u8 for predicates
   1496     if (ETy->isIntegerTy(1))
   1497       O << "u8";
   1498     else
   1499       O << getPTXFundamentalTypeStr(ETy, false);
   1500     O << " ";
   1501     O << *getSymbol(GVar);
   1502 
   1503     // Ptx allows variable initilization only for constant and global state
   1504     // spaces.
   1505     if (GVar->hasInitializer()) {
   1506       if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
   1507           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) {
   1508         const Constant *Initializer = GVar->getInitializer();
   1509         // 'undef' is treated as there is no value spefied.
   1510         if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
   1511           O << " = ";
   1512           printScalarConstant(Initializer, O);
   1513         }
   1514       } else {
   1515         // The frontend adds zero-initializer to variables that don't have an
   1516         // initial value, so skip warning for this case.
   1517         if (!GVar->getInitializer()->isNullValue()) {
   1518           std::string warnMsg = "initial value of '" + GVar->getName().str() +
   1519               "' is not allowed in addrspace(" +
   1520               llvm::utostr_32(PTy->getAddressSpace()) + ")";
   1521           report_fatal_error(warnMsg.c_str());
   1522         }
   1523       }
   1524     }
   1525   } else {
   1526     unsigned int ElementSize = 0;
   1527 
   1528     // Although PTX has direct support for struct type and array type and
   1529     // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
   1530     // targets that support these high level field accesses. Structs, arrays
   1531     // and vectors are lowered into arrays of bytes.
   1532     switch (ETy->getTypeID()) {
   1533     case Type::StructTyID:
   1534     case Type::ArrayTyID:
   1535     case Type::VectorTyID:
   1536       ElementSize = TD->getTypeStoreSize(ETy);
   1537       // Ptx allows variable initilization only for constant and
   1538       // global state spaces.
   1539       if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
   1540            (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
   1541           GVar->hasInitializer()) {
   1542         const Constant *Initializer = GVar->getInitializer();
   1543         if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
   1544           AggBuffer aggBuffer(ElementSize, O, *this);
   1545           bufferAggregateConstant(Initializer, &aggBuffer);
   1546           if (aggBuffer.numSymbols) {
   1547             if (nvptxSubtarget.is64Bit()) {
   1548               O << " .u64 " << *getSymbol(GVar) << "[";
   1549               O << ElementSize / 8;
   1550             } else {
   1551               O << " .u32 " << *getSymbol(GVar) << "[";
   1552               O << ElementSize / 4;
   1553             }
   1554             O << "]";
   1555           } else {
   1556             O << " .b8 " << *getSymbol(GVar) << "[";
   1557             O << ElementSize;
   1558             O << "]";
   1559           }
   1560           O << " = {";
   1561           aggBuffer.print();
   1562           O << "}";
   1563         } else {
   1564           O << " .b8 " << *getSymbol(GVar);
   1565           if (ElementSize) {
   1566             O << "[";
   1567             O << ElementSize;
   1568             O << "]";
   1569           }
   1570         }
   1571       } else {
   1572         O << " .b8 " << *getSymbol(GVar);
   1573         if (ElementSize) {
   1574           O << "[";
   1575           O << ElementSize;
   1576           O << "]";
   1577         }
   1578       }
   1579       break;
   1580     default:
   1581       llvm_unreachable("type not supported yet");
   1582     }
   1583 
   1584   }
   1585   O << ";\n";
   1586 }
   1587 
   1588 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
   1589   if (localDecls.find(f) == localDecls.end())
   1590     return;
   1591 
   1592   std::vector<const GlobalVariable *> &gvars = localDecls[f];
   1593 
   1594   for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
   1595     O << "\t// demoted variable\n\t";
   1596     printModuleLevelGV(gvars[i], O, true);
   1597   }
   1598 }
   1599 
   1600 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
   1601                                           raw_ostream &O) const {
   1602   switch (AddressSpace) {
   1603   case llvm::ADDRESS_SPACE_LOCAL:
   1604     O << "local";
   1605     break;
   1606   case llvm::ADDRESS_SPACE_GLOBAL:
   1607     O << "global";
   1608     break;
   1609   case llvm::ADDRESS_SPACE_CONST:
   1610     O << "const";
   1611     break;
   1612   case llvm::ADDRESS_SPACE_SHARED:
   1613     O << "shared";
   1614     break;
   1615   default:
   1616     report_fatal_error("Bad address space found while emitting PTX");
   1617     break;
   1618   }
   1619 }
   1620 
   1621 std::string
   1622 NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
   1623   switch (Ty->getTypeID()) {
   1624   default:
   1625     llvm_unreachable("unexpected type");
   1626     break;
   1627   case Type::IntegerTyID: {
   1628     unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
   1629     if (NumBits == 1)
   1630       return "pred";
   1631     else if (NumBits <= 64) {
   1632       std::string name = "u";
   1633       return name + utostr(NumBits);
   1634     } else {
   1635       llvm_unreachable("Integer too large");
   1636       break;
   1637     }
   1638     break;
   1639   }
   1640   case Type::FloatTyID:
   1641     return "f32";
   1642   case Type::DoubleTyID:
   1643     return "f64";
   1644   case Type::PointerTyID:
   1645     if (nvptxSubtarget.is64Bit())
   1646       if (useB4PTR)
   1647         return "b64";
   1648       else
   1649         return "u64";
   1650     else if (useB4PTR)
   1651       return "b32";
   1652     else
   1653       return "u32";
   1654   }
   1655   llvm_unreachable("unexpected type");
   1656   return nullptr;
   1657 }
   1658 
   1659 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
   1660                                             raw_ostream &O) {
   1661 
   1662   const DataLayout *TD = TM.getDataLayout();
   1663 
   1664   // GlobalVariables are always constant pointers themselves.
   1665   const PointerType *PTy = GVar->getType();
   1666   Type *ETy = PTy->getElementType();
   1667 
   1668   O << ".";
   1669   emitPTXAddressSpace(PTy->getAddressSpace(), O);
   1670   if (GVar->getAlignment() == 0)
   1671     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
   1672   else
   1673     O << " .align " << GVar->getAlignment();
   1674 
   1675   if (ETy->isSingleValueType()) {
   1676     O << " .";
   1677     O << getPTXFundamentalTypeStr(ETy);
   1678     O << " ";
   1679     O << *getSymbol(GVar);
   1680     return;
   1681   }
   1682 
   1683   int64_t ElementSize = 0;
   1684 
   1685   // Although PTX has direct support for struct type and array type and LLVM IR
   1686   // is very similar to PTX, the LLVM CodeGen does not support for targets that
   1687   // support these high level field accesses. Structs and arrays are lowered
   1688   // into arrays of bytes.
   1689   switch (ETy->getTypeID()) {
   1690   case Type::StructTyID:
   1691   case Type::ArrayTyID:
   1692   case Type::VectorTyID:
   1693     ElementSize = TD->getTypeStoreSize(ETy);
   1694     O << " .b8 " << *getSymbol(GVar) << "[";
   1695     if (ElementSize) {
   1696       O << itostr(ElementSize);
   1697     }
   1698     O << "]";
   1699     break;
   1700   default:
   1701     llvm_unreachable("type not supported yet");
   1702   }
   1703   return;
   1704 }
   1705 
   1706 static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
   1707   if (Ty->isSingleValueType())
   1708     return TD->getPrefTypeAlignment(Ty);
   1709 
   1710   const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
   1711   if (ATy)
   1712     return getOpenCLAlignment(TD, ATy->getElementType());
   1713 
   1714   const VectorType *VTy = dyn_cast<VectorType>(Ty);
   1715   if (VTy) {
   1716     Type *ETy = VTy->getElementType();
   1717     unsigned int numE = VTy->getNumElements();
   1718     unsigned int alignE = TD->getPrefTypeAlignment(ETy);
   1719     if (numE == 3)
   1720       return 4 * alignE;
   1721     else
   1722       return numE * alignE;
   1723   }
   1724 
   1725   const StructType *STy = dyn_cast<StructType>(Ty);
   1726   if (STy) {
   1727     unsigned int alignStruct = 1;
   1728     // Go through each element of the struct and find the
   1729     // largest alignment.
   1730     for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
   1731       Type *ETy = STy->getElementType(i);
   1732       unsigned int align = getOpenCLAlignment(TD, ETy);
   1733       if (align > alignStruct)
   1734         alignStruct = align;
   1735     }
   1736     return alignStruct;
   1737   }
   1738 
   1739   const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
   1740   if (FTy)
   1741     return TD->getPointerPrefAlignment();
   1742   return TD->getPrefTypeAlignment(Ty);
   1743 }
   1744 
   1745 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
   1746                                      int paramIndex, raw_ostream &O) {
   1747   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
   1748       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
   1749     O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
   1750   else {
   1751     std::string argName = I->getName();
   1752     const char *p = argName.c_str();
   1753     while (*p) {
   1754       if (*p == '.')
   1755         O << "_";
   1756       else
   1757         O << *p;
   1758       p++;
   1759     }
   1760   }
   1761 }
   1762 
   1763 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
   1764   Function::const_arg_iterator I, E;
   1765   int i = 0;
   1766 
   1767   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
   1768       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
   1769     O << *CurrentFnSym << "_param_" << paramIndex;
   1770     return;
   1771   }
   1772 
   1773   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
   1774     if (i == paramIndex) {
   1775       printParamName(I, paramIndex, O);
   1776       return;
   1777     }
   1778   }
   1779   llvm_unreachable("paramIndex out of bound");
   1780 }
   1781 
   1782 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
   1783   const DataLayout *TD = TM.getDataLayout();
   1784   const AttributeSet &PAL = F->getAttributes();
   1785   const TargetLowering *TLI = TM.getTargetLowering();
   1786   Function::const_arg_iterator I, E;
   1787   unsigned paramIndex = 0;
   1788   bool first = true;
   1789   bool isKernelFunc = llvm::isKernelFunction(*F);
   1790   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
   1791   MVT thePointerTy = TLI->getPointerTy();
   1792 
   1793   O << "(\n";
   1794 
   1795   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
   1796     Type *Ty = I->getType();
   1797 
   1798     if (!first)
   1799       O << ",\n";
   1800 
   1801     first = false;
   1802 
   1803     // Handle image/sampler parameters
   1804     if (isKernelFunction(*F)) {
   1805       if (isSampler(*I) || isImage(*I)) {
   1806         if (isImage(*I)) {
   1807           std::string sname = I->getName();
   1808           if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
   1809             if (nvptxSubtarget.hasImageHandles())
   1810               O << "\t.param .u64 .ptr .surfref ";
   1811             else
   1812               O << "\t.param .surfref ";
   1813             O << *CurrentFnSym << "_param_" << paramIndex;
   1814           }
   1815           else { // Default image is read_only
   1816             if (nvptxSubtarget.hasImageHandles())
   1817               O << "\t.param .u64 .ptr .texref ";
   1818             else
   1819               O << "\t.param .texref ";
   1820             O << *CurrentFnSym << "_param_" << paramIndex;
   1821           }
   1822         } else {
   1823           if (nvptxSubtarget.hasImageHandles())
   1824             O << "\t.param .u64 .ptr .samplerref ";
   1825           else
   1826             O << "\t.param .samplerref ";
   1827           O << *CurrentFnSym << "_param_" << paramIndex;
   1828         }
   1829         continue;
   1830       }
   1831     }
   1832 
   1833     if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
   1834       if (Ty->isAggregateType() || Ty->isVectorTy()) {
   1835         // Just print .param .align <a> .b8 .param[size];
   1836         // <a> = PAL.getparamalignment
   1837         // size = typeallocsize of element type
   1838         unsigned align = PAL.getParamAlignment(paramIndex + 1);
   1839         if (align == 0)
   1840           align = TD->getABITypeAlignment(Ty);
   1841 
   1842         unsigned sz = TD->getTypeAllocSize(Ty);
   1843         O << "\t.param .align " << align << " .b8 ";
   1844         printParamName(I, paramIndex, O);
   1845         O << "[" << sz << "]";
   1846 
   1847         continue;
   1848       }
   1849       // Just a scalar
   1850       const PointerType *PTy = dyn_cast<PointerType>(Ty);
   1851       if (isKernelFunc) {
   1852         if (PTy) {
   1853           // Special handling for pointer arguments to kernel
   1854           O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
   1855 
   1856           if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
   1857             Type *ETy = PTy->getElementType();
   1858             int addrSpace = PTy->getAddressSpace();
   1859             switch (addrSpace) {
   1860             default:
   1861               O << ".ptr ";
   1862               break;
   1863             case llvm::ADDRESS_SPACE_CONST:
   1864               O << ".ptr .const ";
   1865               break;
   1866             case llvm::ADDRESS_SPACE_SHARED:
   1867               O << ".ptr .shared ";
   1868               break;
   1869             case llvm::ADDRESS_SPACE_GLOBAL:
   1870               O << ".ptr .global ";
   1871               break;
   1872             }
   1873             O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
   1874           }
   1875           printParamName(I, paramIndex, O);
   1876           continue;
   1877         }
   1878 
   1879         // non-pointer scalar to kernel func
   1880         O << "\t.param .";
   1881         // Special case: predicate operands become .u8 types
   1882         if (Ty->isIntegerTy(1))
   1883           O << "u8";
   1884         else
   1885           O << getPTXFundamentalTypeStr(Ty);
   1886         O << " ";
   1887         printParamName(I, paramIndex, O);
   1888         continue;
   1889       }
   1890       // Non-kernel function, just print .param .b<size> for ABI
   1891       // and .reg .b<size> for non-ABI
   1892       unsigned sz = 0;
   1893       if (isa<IntegerType>(Ty)) {
   1894         sz = cast<IntegerType>(Ty)->getBitWidth();
   1895         if (sz < 32)
   1896           sz = 32;
   1897       } else if (isa<PointerType>(Ty))
   1898         sz = thePointerTy.getSizeInBits();
   1899       else
   1900         sz = Ty->getPrimitiveSizeInBits();
   1901       if (isABI)
   1902         O << "\t.param .b" << sz << " ";
   1903       else
   1904         O << "\t.reg .b" << sz << " ";
   1905       printParamName(I, paramIndex, O);
   1906       continue;
   1907     }
   1908 
   1909     // param has byVal attribute. So should be a pointer
   1910     const PointerType *PTy = dyn_cast<PointerType>(Ty);
   1911     assert(PTy && "Param with byval attribute should be a pointer type");
   1912     Type *ETy = PTy->getElementType();
   1913 
   1914     if (isABI || isKernelFunc) {
   1915       // Just print .param .align <a> .b8 .param[size];
   1916       // <a> = PAL.getparamalignment
   1917       // size = typeallocsize of element type
   1918       unsigned align = PAL.getParamAlignment(paramIndex + 1);
   1919       if (align == 0)
   1920         align = TD->getABITypeAlignment(ETy);
   1921 
   1922       unsigned sz = TD->getTypeAllocSize(ETy);
   1923       O << "\t.param .align " << align << " .b8 ";
   1924       printParamName(I, paramIndex, O);
   1925       O << "[" << sz << "]";
   1926       continue;
   1927     } else {
   1928       // Split the ETy into constituent parts and
   1929       // print .param .b<size> <name> for each part.
   1930       // Further, if a part is vector, print the above for
   1931       // each vector element.
   1932       SmallVector<EVT, 16> vtparts;
   1933       ComputeValueVTs(*TLI, ETy, vtparts);
   1934       for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
   1935         unsigned elems = 1;
   1936         EVT elemtype = vtparts[i];
   1937         if (vtparts[i].isVector()) {
   1938           elems = vtparts[i].getVectorNumElements();
   1939           elemtype = vtparts[i].getVectorElementType();
   1940         }
   1941 
   1942         for (unsigned j = 0, je = elems; j != je; ++j) {
   1943           unsigned sz = elemtype.getSizeInBits();
   1944           if (elemtype.isInteger() && (sz < 32))
   1945             sz = 32;
   1946           O << "\t.reg .b" << sz << " ";
   1947           printParamName(I, paramIndex, O);
   1948           if (j < je - 1)
   1949             O << ",\n";
   1950           ++paramIndex;
   1951         }
   1952         if (i < e - 1)
   1953           O << ",\n";
   1954       }
   1955       --paramIndex;
   1956       continue;
   1957     }
   1958   }
   1959 
   1960   O << "\n)\n";
   1961 }
   1962 
   1963 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
   1964                                             raw_ostream &O) {
   1965   const Function *F = MF.getFunction();
   1966   emitFunctionParamList(F, O);
   1967 }
   1968 
   1969 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
   1970     const MachineFunction &MF) {
   1971   SmallString<128> Str;
   1972   raw_svector_ostream O(Str);
   1973 
   1974   // Map the global virtual register number to a register class specific
   1975   // virtual register number starting from 1 with that class.
   1976   const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
   1977   //unsigned numRegClasses = TRI->getNumRegClasses();
   1978 
   1979   // Emit the Fake Stack Object
   1980   const MachineFrameInfo *MFI = MF.getFrameInfo();
   1981   int NumBytes = (int) MFI->getStackSize();
   1982   if (NumBytes) {
   1983     O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
   1984       << getFunctionNumber() << "[" << NumBytes << "];\n";
   1985     if (nvptxSubtarget.is64Bit()) {
   1986       O << "\t.reg .b64 \t%SP;\n";
   1987       O << "\t.reg .b64 \t%SPL;\n";
   1988     } else {
   1989       O << "\t.reg .b32 \t%SP;\n";
   1990       O << "\t.reg .b32 \t%SPL;\n";
   1991     }
   1992   }
   1993 
   1994   // Go through all virtual registers to establish the mapping between the
   1995   // global virtual
   1996   // register number and the per class virtual register number.
   1997   // We use the per class virtual register number in the ptx output.
   1998   unsigned int numVRs = MRI->getNumVirtRegs();
   1999   for (unsigned i = 0; i < numVRs; i++) {
   2000     unsigned int vr = TRI->index2VirtReg(i);
   2001     const TargetRegisterClass *RC = MRI->getRegClass(vr);
   2002     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
   2003     int n = regmap.size();
   2004     regmap.insert(std::make_pair(vr, n + 1));
   2005   }
   2006 
   2007   // Emit register declarations
   2008   // @TODO: Extract out the real register usage
   2009   // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
   2010   // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
   2011   // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
   2012   // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
   2013   // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
   2014   // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
   2015   // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
   2016 
   2017   // Emit declaration of the virtual registers or 'physical' registers for
   2018   // each register class
   2019   for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
   2020     const TargetRegisterClass *RC = TRI->getRegClass(i);
   2021     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
   2022     std::string rcname = getNVPTXRegClassName(RC);
   2023     std::string rcStr = getNVPTXRegClassStr(RC);
   2024     int n = regmap.size();
   2025 
   2026     // Only declare those registers that may be used.
   2027     if (n) {
   2028        O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
   2029          << ">;\n";
   2030     }
   2031   }
   2032 
   2033   OutStreamer.EmitRawText(O.str());
   2034 }
   2035 
   2036 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
   2037   APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
   2038   bool ignored;
   2039   unsigned int numHex;
   2040   const char *lead;
   2041 
   2042   if (Fp->getType()->getTypeID() == Type::FloatTyID) {
   2043     numHex = 8;
   2044     lead = "0f";
   2045     APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
   2046   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
   2047     numHex = 16;
   2048     lead = "0d";
   2049     APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
   2050   } else
   2051     llvm_unreachable("unsupported fp type");
   2052 
   2053   APInt API = APF.bitcastToAPInt();
   2054   std::string hexstr(utohexstr(API.getZExtValue()));
   2055   O << lead;
   2056   if (hexstr.length() < numHex)
   2057     O << std::string(numHex - hexstr.length(), '0');
   2058   O << utohexstr(API.getZExtValue());
   2059 }
   2060 
   2061 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
   2062   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
   2063     O << CI->getValue();
   2064     return;
   2065   }
   2066   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
   2067     printFPConstant(CFP, O);
   2068     return;
   2069   }
   2070   if (isa<ConstantPointerNull>(CPV)) {
   2071     O << "0";
   2072     return;
   2073   }
   2074   if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
   2075     PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
   2076     bool IsNonGenericPointer = false;
   2077     if (PTy && PTy->getAddressSpace() != 0) {
   2078       IsNonGenericPointer = true;
   2079     }
   2080     if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
   2081       O << "generic(";
   2082       O << *getSymbol(GVar);
   2083       O << ")";
   2084     } else {
   2085       O << *getSymbol(GVar);
   2086     }
   2087     return;
   2088   }
   2089   if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   2090     const Value *v = Cexpr->stripPointerCasts();
   2091     PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
   2092     bool IsNonGenericPointer = false;
   2093     if (PTy && PTy->getAddressSpace() != 0) {
   2094       IsNonGenericPointer = true;
   2095     }
   2096     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
   2097       if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
   2098         O << "generic(";
   2099         O << *getSymbol(GVar);
   2100         O << ")";
   2101       } else {
   2102         O << *getSymbol(GVar);
   2103       }
   2104       return;
   2105     } else {
   2106       O << *LowerConstant(CPV, *this);
   2107       return;
   2108     }
   2109   }
   2110   llvm_unreachable("Not scalar type found in printScalarConstant()");
   2111 }
   2112 
   2113 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
   2114                                    AggBuffer *aggBuffer) {
   2115 
   2116   const DataLayout *TD = TM.getDataLayout();
   2117 
   2118   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
   2119     int s = TD->getTypeAllocSize(CPV->getType());
   2120     if (s < Bytes)
   2121       s = Bytes;
   2122     aggBuffer->addZeros(s);
   2123     return;
   2124   }
   2125 
   2126   unsigned char *ptr;
   2127   switch (CPV->getType()->getTypeID()) {
   2128 
   2129   case Type::IntegerTyID: {
   2130     const Type *ETy = CPV->getType();
   2131     if (ETy == Type::getInt8Ty(CPV->getContext())) {
   2132       unsigned char c =
   2133           (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
   2134       ptr = &c;
   2135       aggBuffer->addBytes(ptr, 1, Bytes);
   2136     } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
   2137       short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
   2138       ptr = (unsigned char *)&int16;
   2139       aggBuffer->addBytes(ptr, 2, Bytes);
   2140     } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
   2141       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
   2142         int int32 = (int)(constInt->getZExtValue());
   2143         ptr = (unsigned char *)&int32;
   2144         aggBuffer->addBytes(ptr, 4, Bytes);
   2145         break;
   2146       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   2147         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
   2148                 ConstantFoldConstantExpression(Cexpr, TD))) {
   2149           int int32 = (int)(constInt->getZExtValue());
   2150           ptr = (unsigned char *)&int32;
   2151           aggBuffer->addBytes(ptr, 4, Bytes);
   2152           break;
   2153         }
   2154         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
   2155           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
   2156           aggBuffer->addSymbol(v);
   2157           aggBuffer->addZeros(4);
   2158           break;
   2159         }
   2160       }
   2161       llvm_unreachable("unsupported integer const type");
   2162     } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
   2163       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
   2164         long long int64 = (long long)(constInt->getZExtValue());
   2165         ptr = (unsigned char *)&int64;
   2166         aggBuffer->addBytes(ptr, 8, Bytes);
   2167         break;
   2168       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   2169         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
   2170                 ConstantFoldConstantExpression(Cexpr, TD))) {
   2171           long long int64 = (long long)(constInt->getZExtValue());
   2172           ptr = (unsigned char *)&int64;
   2173           aggBuffer->addBytes(ptr, 8, Bytes);
   2174           break;
   2175         }
   2176         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
   2177           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
   2178           aggBuffer->addSymbol(v);
   2179           aggBuffer->addZeros(8);
   2180           break;
   2181         }
   2182       }
   2183       llvm_unreachable("unsupported integer const type");
   2184     } else
   2185       llvm_unreachable("unsupported integer const type");
   2186     break;
   2187   }
   2188   case Type::FloatTyID:
   2189   case Type::DoubleTyID: {
   2190     const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
   2191     const Type *Ty = CFP->getType();
   2192     if (Ty == Type::getFloatTy(CPV->getContext())) {
   2193       float float32 = (float) CFP->getValueAPF().convertToFloat();
   2194       ptr = (unsigned char *)&float32;
   2195       aggBuffer->addBytes(ptr, 4, Bytes);
   2196     } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
   2197       double float64 = CFP->getValueAPF().convertToDouble();
   2198       ptr = (unsigned char *)&float64;
   2199       aggBuffer->addBytes(ptr, 8, Bytes);
   2200     } else {
   2201       llvm_unreachable("unsupported fp const type");
   2202     }
   2203     break;
   2204   }
   2205   case Type::PointerTyID: {
   2206     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
   2207       aggBuffer->addSymbol(GVar);
   2208     } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   2209       const Value *v = Cexpr->stripPointerCasts();
   2210       aggBuffer->addSymbol(v);
   2211     }
   2212     unsigned int s = TD->getTypeAllocSize(CPV->getType());
   2213     aggBuffer->addZeros(s);
   2214     break;
   2215   }
   2216 
   2217   case Type::ArrayTyID:
   2218   case Type::VectorTyID:
   2219   case Type::StructTyID: {
   2220     if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
   2221         isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
   2222       int ElementSize = TD->getTypeAllocSize(CPV->getType());
   2223       bufferAggregateConstant(CPV, aggBuffer);
   2224       if (Bytes > ElementSize)
   2225         aggBuffer->addZeros(Bytes - ElementSize);
   2226     } else if (isa<ConstantAggregateZero>(CPV))
   2227       aggBuffer->addZeros(Bytes);
   2228     else
   2229       llvm_unreachable("Unexpected Constant type");
   2230     break;
   2231   }
   2232 
   2233   default:
   2234     llvm_unreachable("unsupported type");
   2235   }
   2236 }
   2237 
   2238 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
   2239                                               AggBuffer *aggBuffer) {
   2240   const DataLayout *TD = TM.getDataLayout();
   2241   int Bytes;
   2242 
   2243   // Old constants
   2244   if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
   2245     if (CPV->getNumOperands())
   2246       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
   2247         bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
   2248     return;
   2249   }
   2250 
   2251   if (const ConstantDataSequential *CDS =
   2252           dyn_cast<ConstantDataSequential>(CPV)) {
   2253     if (CDS->getNumElements())
   2254       for (unsigned i = 0; i < CDS->getNumElements(); ++i)
   2255         bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
   2256                      aggBuffer);
   2257     return;
   2258   }
   2259 
   2260   if (isa<ConstantStruct>(CPV)) {
   2261     if (CPV->getNumOperands()) {
   2262       StructType *ST = cast<StructType>(CPV->getType());
   2263       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
   2264         if (i == (e - 1))
   2265           Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
   2266                   TD->getTypeAllocSize(ST) -
   2267                   TD->getStructLayout(ST)->getElementOffset(i);
   2268         else
   2269           Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
   2270                   TD->getStructLayout(ST)->getElementOffset(i);
   2271         bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
   2272       }
   2273     }
   2274     return;
   2275   }
   2276   llvm_unreachable("unsupported constant type in printAggregateConstant()");
   2277 }
   2278 
   2279 // buildTypeNameMap - Run through symbol table looking for type names.
   2280 //
   2281 
   2282 bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
   2283 
   2284   std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
   2285 
   2286   if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
   2287                                   !PI->second.compare("struct._image2d_t") ||
   2288                                   !PI->second.compare("struct._image3d_t")))
   2289     return true;
   2290 
   2291   return false;
   2292 }
   2293 
   2294 
   2295 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
   2296   switch (MI.getOpcode()) {
   2297   default:
   2298     return false;
   2299   case NVPTX::CallArgBeginInst:
   2300   case NVPTX::CallArgEndInst0:
   2301   case NVPTX::CallArgEndInst1:
   2302   case NVPTX::CallArgF32:
   2303   case NVPTX::CallArgF64:
   2304   case NVPTX::CallArgI16:
   2305   case NVPTX::CallArgI32:
   2306   case NVPTX::CallArgI32imm:
   2307   case NVPTX::CallArgI64:
   2308   case NVPTX::CallArgParam:
   2309   case NVPTX::CallVoidInst:
   2310   case NVPTX::CallVoidInstReg:
   2311   case NVPTX::Callseq_End:
   2312   case NVPTX::CallVoidInstReg64:
   2313   case NVPTX::DeclareParamInst:
   2314   case NVPTX::DeclareRetMemInst:
   2315   case NVPTX::DeclareRetRegInst:
   2316   case NVPTX::DeclareRetScalarInst:
   2317   case NVPTX::DeclareScalarParamInst:
   2318   case NVPTX::DeclareScalarRegInst:
   2319   case NVPTX::StoreParamF32:
   2320   case NVPTX::StoreParamF64:
   2321   case NVPTX::StoreParamI16:
   2322   case NVPTX::StoreParamI32:
   2323   case NVPTX::StoreParamI64:
   2324   case NVPTX::StoreParamI8:
   2325   case NVPTX::StoreRetvalF32:
   2326   case NVPTX::StoreRetvalF64:
   2327   case NVPTX::StoreRetvalI16:
   2328   case NVPTX::StoreRetvalI32:
   2329   case NVPTX::StoreRetvalI64:
   2330   case NVPTX::StoreRetvalI8:
   2331   case NVPTX::LastCallArgF32:
   2332   case NVPTX::LastCallArgF64:
   2333   case NVPTX::LastCallArgI16:
   2334   case NVPTX::LastCallArgI32:
   2335   case NVPTX::LastCallArgI32imm:
   2336   case NVPTX::LastCallArgI64:
   2337   case NVPTX::LastCallArgParam:
   2338   case NVPTX::LoadParamMemF32:
   2339   case NVPTX::LoadParamMemF64:
   2340   case NVPTX::LoadParamMemI16:
   2341   case NVPTX::LoadParamMemI32:
   2342   case NVPTX::LoadParamMemI64:
   2343   case NVPTX::LoadParamMemI8:
   2344   case NVPTX::PrototypeInst:
   2345   case NVPTX::DBG_VALUE:
   2346     return true;
   2347   }
   2348   return false;
   2349 }
   2350 
   2351 /// PrintAsmOperand - Print out an operand for an inline asm expression.
   2352 ///
   2353 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   2354                                       unsigned AsmVariant,
   2355                                       const char *ExtraCode, raw_ostream &O) {
   2356   if (ExtraCode && ExtraCode[0]) {
   2357     if (ExtraCode[1] != 0)
   2358       return true; // Unknown modifier.
   2359 
   2360     switch (ExtraCode[0]) {
   2361     default:
   2362       // See if this is a generic print operand
   2363       return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
   2364     case 'r':
   2365       break;
   2366     }
   2367   }
   2368 
   2369   printOperand(MI, OpNo, O);
   2370 
   2371   return false;
   2372 }
   2373 
   2374 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
   2375     const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
   2376     const char *ExtraCode, raw_ostream &O) {
   2377   if (ExtraCode && ExtraCode[0])
   2378     return true; // Unknown modifier
   2379 
   2380   O << '[';
   2381   printMemOperand(MI, OpNo, O);
   2382   O << ']';
   2383 
   2384   return false;
   2385 }
   2386 
   2387 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
   2388                                    raw_ostream &O, const char *Modifier) {
   2389   const MachineOperand &MO = MI->getOperand(opNum);
   2390   switch (MO.getType()) {
   2391   case MachineOperand::MO_Register:
   2392     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
   2393       if (MO.getReg() == NVPTX::VRDepot)
   2394         O << DEPOTNAME << getFunctionNumber();
   2395       else
   2396         O << NVPTXInstPrinter::getRegisterName(MO.getReg());
   2397     } else {
   2398       emitVirtualRegister(MO.getReg(), O);
   2399     }
   2400     return;
   2401 
   2402   case MachineOperand::MO_Immediate:
   2403     if (!Modifier)
   2404       O << MO.getImm();
   2405     else if (strstr(Modifier, "vec") == Modifier)
   2406       printVecModifiedImmediate(MO, Modifier, O);
   2407     else
   2408       llvm_unreachable(
   2409           "Don't know how to handle modifier on immediate operand");
   2410     return;
   2411 
   2412   case MachineOperand::MO_FPImmediate:
   2413     printFPConstant(MO.getFPImm(), O);
   2414     break;
   2415 
   2416   case MachineOperand::MO_GlobalAddress:
   2417     O << *getSymbol(MO.getGlobal());
   2418     break;
   2419 
   2420   case MachineOperand::MO_MachineBasicBlock:
   2421     O << *MO.getMBB()->getSymbol();
   2422     return;
   2423 
   2424   default:
   2425     llvm_unreachable("Operand type not supported.");
   2426   }
   2427 }
   2428 
   2429 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
   2430                                       raw_ostream &O, const char *Modifier) {
   2431   printOperand(MI, opNum, O);
   2432 
   2433   if (Modifier && !strcmp(Modifier, "add")) {
   2434     O << ", ";
   2435     printOperand(MI, opNum + 1, O);
   2436   } else {
   2437     if (MI->getOperand(opNum + 1).isImm() &&
   2438         MI->getOperand(opNum + 1).getImm() == 0)
   2439       return; // don't print ',0' or '+0'
   2440     O << "+";
   2441     printOperand(MI, opNum + 1, O);
   2442   }
   2443 }
   2444 
   2445 
   2446 // Force static initialization.
   2447 extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
   2448   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
   2449   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
   2450 }
   2451 
   2452 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   2453   std::stringstream temp;
   2454   LineReader *reader = this->getReader(filename.str());
   2455   temp << "\n//";
   2456   temp << filename.str();
   2457   temp << ":";
   2458   temp << line;
   2459   temp << " ";
   2460   temp << reader->readLine(line);
   2461   temp << "\n";
   2462   this->OutStreamer.EmitRawText(Twine(temp.str()));
   2463 }
   2464 
   2465 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
   2466   if (!reader) {
   2467     reader = new LineReader(filename);
   2468   }
   2469 
   2470   if (reader->fileName() != filename) {
   2471     delete reader;
   2472     reader = new LineReader(filename);
   2473   }
   2474 
   2475   return reader;
   2476 }
   2477 
   2478 std::string LineReader::readLine(unsigned lineNum) {
   2479   if (lineNum < theCurLine) {
   2480     theCurLine = 0;
   2481     fstr.seekg(0, std::ios::beg);
   2482   }
   2483   while (theCurLine < lineNum) {
   2484     fstr.getline(buff, 500);
   2485     theCurLine++;
   2486   }
   2487   return buff;
   2488 }
   2489 
   2490 // Force static initialization.
   2491 extern "C" void LLVMInitializeNVPTXAsmPrinter() {
   2492   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
   2493   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
   2494 }
   2495