Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains a printer that converts from our internal representation
     11 // of machine-dependent LLVM code to NVPTX assembly language.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "NVPTXAsmPrinter.h"
     16 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
     17 #include "NVPTX.h"
     18 #include "NVPTXInstrInfo.h"
     19 #include "NVPTXMCExpr.h"
     20 #include "NVPTXRegisterInfo.h"
     21 #include "NVPTXTargetMachine.h"
     22 #include "NVPTXUtilities.h"
     23 #include "cl_common_defines.h"
     24 #include "llvm/ADT/StringExtras.h"
     25 #include "llvm/Analysis/ConstantFolding.h"
     26 #include "llvm/Assembly/Writer.h"
     27 #include "llvm/CodeGen/Analysis.h"
     28 #include "llvm/CodeGen/MachineFrameInfo.h"
     29 #include "llvm/CodeGen/MachineModuleInfo.h"
     30 #include "llvm/CodeGen/MachineRegisterInfo.h"
     31 #include "llvm/DebugInfo.h"
     32 #include "llvm/IR/DerivedTypes.h"
     33 #include "llvm/IR/Function.h"
     34 #include "llvm/IR/GlobalVariable.h"
     35 #include "llvm/IR/Module.h"
     36 #include "llvm/IR/Operator.h"
     37 #include "llvm/MC/MCStreamer.h"
     38 #include "llvm/MC/MCSymbol.h"
     39 #include "llvm/Support/CommandLine.h"
     40 #include "llvm/Support/ErrorHandling.h"
     41 #include "llvm/Support/FormattedStream.h"
     42 #include "llvm/Support/Path.h"
     43 #include "llvm/Support/TargetRegistry.h"
     44 #include "llvm/Support/TimeValue.h"
     45 #include "llvm/Target/Mangler.h"
     46 #include "llvm/Target/TargetLoweringObjectFile.h"
     47 #include <sstream>
     48 using namespace llvm;
     49 
     50 bool RegAllocNilUsed = true;
     51 
     52 #define DEPOTNAME "__local_depot"
     53 
     54 static cl::opt<bool>
     55 EmitLineNumbers("nvptx-emit-line-numbers",
     56                 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
     57                 cl::init(true));
     58 
     59 namespace llvm { bool InterleaveSrcInPtx = false; }
     60 
     61 static cl::opt<bool, true>
     62 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
     63               cl::desc("NVPTX Specific: Emit source line in ptx file"),
     64               cl::location(llvm::InterleaveSrcInPtx));
     65 
     66 namespace {
     67 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
     68 /// depends.
     69 void DiscoverDependentGlobals(const Value *V,
     70                               DenseSet<const GlobalVariable *> &Globals) {
     71   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     72     Globals.insert(GV);
     73   else {
     74     if (const User *U = dyn_cast<User>(V)) {
     75       for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
     76         DiscoverDependentGlobals(U->getOperand(i), Globals);
     77       }
     78     }
     79   }
     80 }
     81 
     82 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
     83 /// instances to be emitted, but only after any dependents have been added
     84 /// first.
     85 void VisitGlobalVariableForEmission(
     86     const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
     87     DenseSet<const GlobalVariable *> &Visited,
     88     DenseSet<const GlobalVariable *> &Visiting) {
     89   // Have we already visited this one?
     90   if (Visited.count(GV))
     91     return;
     92 
     93   // Do we have a circular dependency?
     94   if (Visiting.count(GV))
     95     report_fatal_error("Circular dependency found in global variable set");
     96 
     97   // Start visiting this global
     98   Visiting.insert(GV);
     99 
    100   // Make sure we visit all dependents first
    101   DenseSet<const GlobalVariable *> Others;
    102   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
    103     DiscoverDependentGlobals(GV->getOperand(i), Others);
    104 
    105   for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
    106                                                   E = Others.end();
    107        I != E; ++I)
    108     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
    109 
    110   // Now we can visit ourself
    111   Order.push_back(GV);
    112   Visited.insert(GV);
    113   Visiting.erase(GV);
    114 }
    115 }
    116 
    117 // @TODO: This is a copy from AsmPrinter.cpp.  The function is static, so we
    118 // cannot just link to the existing version.
    119 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
    120 ///
    121 using namespace nvptx;
    122 const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
    123   MCContext &Ctx = AP.OutContext;
    124 
    125   if (CV->isNullValue() || isa<UndefValue>(CV))
    126     return MCConstantExpr::Create(0, Ctx);
    127 
    128   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
    129     return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
    130 
    131   if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
    132     return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
    133 
    134   if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
    135     return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
    136 
    137   const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
    138   if (CE == 0)
    139     llvm_unreachable("Unknown constant value to lower!");
    140 
    141   switch (CE->getOpcode()) {
    142   default:
    143     // If the code isn't optimized, there may be outstanding folding
    144     // opportunities. Attempt to fold the expression using DataLayout as a
    145     // last resort before giving up.
    146     if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
    147       if (C != CE)
    148         return LowerConstant(C, AP);
    149 
    150     // Otherwise report the problem to the user.
    151     {
    152       std::string S;
    153       raw_string_ostream OS(S);
    154       OS << "Unsupported expression in static initializer: ";
    155       WriteAsOperand(OS, CE, /*PrintType=*/ false,
    156                      !AP.MF ? 0 : AP.MF->getFunction()->getParent());
    157       report_fatal_error(OS.str());
    158     }
    159   case Instruction::GetElementPtr: {
    160     const DataLayout &TD = *AP.TM.getDataLayout();
    161     // Generate a symbolic expression for the byte address
    162     APInt OffsetAI(TD.getPointerSizeInBits(), 0);
    163     cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
    164 
    165     const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
    166     if (!OffsetAI)
    167       return Base;
    168 
    169     int64_t Offset = OffsetAI.getSExtValue();
    170     return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
    171                                    Ctx);
    172   }
    173 
    174   case Instruction::Trunc:
    175     // We emit the value and depend on the assembler to truncate the generated
    176     // expression properly.  This is important for differences between
    177     // blockaddress labels.  Since the two labels are in the same function, it
    178     // is reasonable to treat their delta as a 32-bit value.
    179   // FALL THROUGH.
    180   case Instruction::BitCast:
    181     return LowerConstant(CE->getOperand(0), AP);
    182 
    183   case Instruction::IntToPtr: {
    184     const DataLayout &TD = *AP.TM.getDataLayout();
    185     // Handle casts to pointers by changing them into casts to the appropriate
    186     // integer type.  This promotes constant folding and simplifies this code.
    187     Constant *Op = CE->getOperand(0);
    188     Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
    189                                       false /*ZExt*/);
    190     return LowerConstant(Op, AP);
    191   }
    192 
    193   case Instruction::PtrToInt: {
    194     const DataLayout &TD = *AP.TM.getDataLayout();
    195     // Support only foldable casts to/from pointers that can be eliminated by
    196     // changing the pointer to the appropriately sized integer type.
    197     Constant *Op = CE->getOperand(0);
    198     Type *Ty = CE->getType();
    199 
    200     const MCExpr *OpExpr = LowerConstant(Op, AP);
    201 
    202     // We can emit the pointer value into this slot if the slot is an
    203     // integer slot equal to the size of the pointer.
    204     if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
    205       return OpExpr;
    206 
    207     // Otherwise the pointer is smaller than the resultant integer, mask off
    208     // the high bits so we are sure to get a proper truncation if the input is
    209     // a constant expr.
    210     unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
    211     const MCExpr *MaskExpr =
    212         MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
    213     return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
    214   }
    215 
    216     // The MC library also has a right-shift operator, but it isn't consistently
    217   // signed or unsigned between different targets.
    218   case Instruction::Add:
    219   case Instruction::Sub:
    220   case Instruction::Mul:
    221   case Instruction::SDiv:
    222   case Instruction::SRem:
    223   case Instruction::Shl:
    224   case Instruction::And:
    225   case Instruction::Or:
    226   case Instruction::Xor: {
    227     const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
    228     const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
    229     switch (CE->getOpcode()) {
    230     default:
    231       llvm_unreachable("Unknown binary operator constant cast expr");
    232     case Instruction::Add:
    233       return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
    234     case Instruction::Sub:
    235       return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
    236     case Instruction::Mul:
    237       return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
    238     case Instruction::SDiv:
    239       return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
    240     case Instruction::SRem:
    241       return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
    242     case Instruction::Shl:
    243       return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
    244     case Instruction::And:
    245       return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
    246     case Instruction::Or:
    247       return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
    248     case Instruction::Xor:
    249       return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
    250     }
    251   }
    252   }
    253 }
    254 
    255 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
    256   if (!EmitLineNumbers)
    257     return;
    258   if (ignoreLoc(MI))
    259     return;
    260 
    261   DebugLoc curLoc = MI.getDebugLoc();
    262 
    263   if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
    264     return;
    265 
    266   if (prevDebugLoc == curLoc)
    267     return;
    268 
    269   prevDebugLoc = curLoc;
    270 
    271   if (curLoc.isUnknown())
    272     return;
    273 
    274   const MachineFunction *MF = MI.getParent()->getParent();
    275   //const TargetMachine &TM = MF->getTarget();
    276 
    277   const LLVMContext &ctx = MF->getFunction()->getContext();
    278   DIScope Scope(curLoc.getScope(ctx));
    279 
    280   assert((!Scope || Scope.isScope()) &&
    281     "Scope of a DebugLoc should be null or a DIScope.");
    282   if (!Scope)
    283      return;
    284 
    285   StringRef fileName(Scope.getFilename());
    286   StringRef dirName(Scope.getDirectory());
    287   SmallString<128> FullPathName = dirName;
    288   if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
    289     sys::path::append(FullPathName, fileName);
    290     fileName = FullPathName.str();
    291   }
    292 
    293   if (filenameMap.find(fileName.str()) == filenameMap.end())
    294     return;
    295 
    296   // Emit the line from the source file.
    297   if (llvm::InterleaveSrcInPtx)
    298     this->emitSrcInText(fileName.str(), curLoc.getLine());
    299 
    300   std::stringstream temp;
    301   temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
    302        << " " << curLoc.getCol();
    303   OutStreamer.EmitRawText(Twine(temp.str().c_str()));
    304 }
    305 
    306 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    307   SmallString<128> Str;
    308   raw_svector_ostream OS(Str);
    309   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
    310     emitLineNumberAsDotLoc(*MI);
    311 
    312   MCInst Inst;
    313   lowerToMCInst(MI, Inst);
    314   OutStreamer.EmitInstruction(Inst);
    315 }
    316 
    317 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
    318   OutMI.setOpcode(MI->getOpcode());
    319 
    320   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    321     const MachineOperand &MO = MI->getOperand(i);
    322 
    323     MCOperand MCOp;
    324     if (lowerOperand(MO, MCOp))
    325       OutMI.addOperand(MCOp);
    326   }
    327 }
    328 
    329 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
    330                                    MCOperand &MCOp) {
    331   switch (MO.getType()) {
    332   default: llvm_unreachable("unknown operand type");
    333   case MachineOperand::MO_Register:
    334     MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg()));
    335     break;
    336   case MachineOperand::MO_Immediate:
    337     MCOp = MCOperand::CreateImm(MO.getImm());
    338     break;
    339   case MachineOperand::MO_MachineBasicBlock:
    340     MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
    341         MO.getMBB()->getSymbol(), OutContext));
    342     break;
    343   case MachineOperand::MO_ExternalSymbol:
    344     MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
    345     break;
    346   case MachineOperand::MO_GlobalAddress:
    347     MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
    348     break;
    349   case MachineOperand::MO_FPImmediate: {
    350     const ConstantFP *Cnt = MO.getFPImm();
    351     APFloat Val = Cnt->getValueAPF();
    352 
    353     switch (Cnt->getType()->getTypeID()) {
    354     default: report_fatal_error("Unsupported FP type"); break;
    355     case Type::FloatTyID:
    356       MCOp = MCOperand::CreateExpr(
    357         NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext));
    358       break;
    359     case Type::DoubleTyID:
    360       MCOp = MCOperand::CreateExpr(
    361         NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext));
    362       break;
    363     }
    364     break;
    365   }
    366   }
    367   return true;
    368 }
    369 
    370 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
    371   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    372     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
    373 
    374     DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
    375     unsigned RegNum = RegMap[Reg];
    376 
    377     // Encode the register class in the upper 4 bits
    378     // Must be kept in sync with NVPTXInstPrinter::printRegName
    379     unsigned Ret = 0;
    380     if (RC == &NVPTX::Int1RegsRegClass) {
    381       Ret = (1 << 28);
    382     } else if (RC == &NVPTX::Int16RegsRegClass) {
    383       Ret = (2 << 28);
    384     } else if (RC == &NVPTX::Int32RegsRegClass) {
    385       Ret = (3 << 28);
    386     } else if (RC == &NVPTX::Int64RegsRegClass) {
    387       Ret = (4 << 28);
    388     } else if (RC == &NVPTX::Float32RegsRegClass) {
    389       Ret = (5 << 28);
    390     } else if (RC == &NVPTX::Float64RegsRegClass) {
    391       Ret = (6 << 28);
    392     } else {
    393       report_fatal_error("Bad register class");
    394     }
    395 
    396     // Insert the vreg number
    397     Ret |= (RegNum & 0x0FFFFFFF);
    398     return Ret;
    399   } else {
    400     // Some special-use registers are actually physical registers.
    401     // Encode this as the register class ID of 0 and the real register ID.
    402     return Reg & 0x0FFFFFFF;
    403   }
    404 }
    405 
    406 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
    407                                         const MCSymbol *Symbol) {
    408   const MCExpr *Expr;
    409   Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
    410                                  OutContext);
    411   return MCOperand::CreateExpr(Expr);
    412 }
    413 
    414 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
    415   const DataLayout *TD = TM.getDataLayout();
    416   const TargetLowering *TLI = TM.getTargetLowering();
    417 
    418   Type *Ty = F->getReturnType();
    419 
    420   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
    421 
    422   if (Ty->getTypeID() == Type::VoidTyID)
    423     return;
    424 
    425   O << " (";
    426 
    427   if (isABI) {
    428     if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
    429       unsigned size = 0;
    430       if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
    431         size = ITy->getBitWidth();
    432         if (size < 32)
    433           size = 32;
    434       } else {
    435         assert(Ty->isFloatingPointTy() && "Floating point type expected here");
    436         size = Ty->getPrimitiveSizeInBits();
    437       }
    438 
    439       O << ".param .b" << size << " func_retval0";
    440     } else if (isa<PointerType>(Ty)) {
    441       O << ".param .b" << TLI->getPointerTy().getSizeInBits()
    442         << " func_retval0";
    443     } else {
    444       if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
    445         SmallVector<EVT, 16> vtparts;
    446         ComputeValueVTs(*TLI, Ty, vtparts);
    447         unsigned totalsz = 0;
    448         for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
    449           unsigned elems = 1;
    450           EVT elemtype = vtparts[i];
    451           if (vtparts[i].isVector()) {
    452             elems = vtparts[i].getVectorNumElements();
    453             elemtype = vtparts[i].getVectorElementType();
    454           }
    455           for (unsigned j = 0, je = elems; j != je; ++j) {
    456             unsigned sz = elemtype.getSizeInBits();
    457             if (elemtype.isInteger() && (sz < 8))
    458               sz = 8;
    459             totalsz += sz / 8;
    460           }
    461         }
    462         unsigned retAlignment = 0;
    463         if (!llvm::getAlign(*F, 0, retAlignment))
    464           retAlignment = TD->getABITypeAlignment(Ty);
    465         O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
    466           << "]";
    467       } else
    468         assert(false && "Unknown return type");
    469     }
    470   } else {
    471     SmallVector<EVT, 16> vtparts;
    472     ComputeValueVTs(*TLI, Ty, vtparts);
    473     unsigned idx = 0;
    474     for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
    475       unsigned elems = 1;
    476       EVT elemtype = vtparts[i];
    477       if (vtparts[i].isVector()) {
    478         elems = vtparts[i].getVectorNumElements();
    479         elemtype = vtparts[i].getVectorElementType();
    480       }
    481 
    482       for (unsigned j = 0, je = elems; j != je; ++j) {
    483         unsigned sz = elemtype.getSizeInBits();
    484         if (elemtype.isInteger() && (sz < 32))
    485           sz = 32;
    486         O << ".reg .b" << sz << " func_retval" << idx;
    487         if (j < je - 1)
    488           O << ", ";
    489         ++idx;
    490       }
    491       if (i < e - 1)
    492         O << ", ";
    493     }
    494   }
    495   O << ") ";
    496   return;
    497 }
    498 
    499 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
    500                                         raw_ostream &O) {
    501   const Function *F = MF.getFunction();
    502   printReturnValStr(F, O);
    503 }
    504 
    505 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
    506   SmallString<128> Str;
    507   raw_svector_ostream O(Str);
    508 
    509   if (!GlobalsEmitted) {
    510     emitGlobals(*MF->getFunction()->getParent());
    511     GlobalsEmitted = true;
    512   }
    513 
    514   // Set up
    515   MRI = &MF->getRegInfo();
    516   F = MF->getFunction();
    517   emitLinkageDirective(F, O);
    518   if (llvm::isKernelFunction(*F))
    519     O << ".entry ";
    520   else {
    521     O << ".func ";
    522     printReturnValStr(*MF, O);
    523   }
    524 
    525   O << *CurrentFnSym;
    526 
    527   emitFunctionParamList(*MF, O);
    528 
    529   if (llvm::isKernelFunction(*F))
    530     emitKernelFunctionDirectives(*F, O);
    531 
    532   OutStreamer.EmitRawText(O.str());
    533 
    534   prevDebugLoc = DebugLoc();
    535 }
    536 
    537 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
    538   VRegMapping.clear();
    539   OutStreamer.EmitRawText(StringRef("{\n"));
    540   setAndEmitFunctionVirtualRegisters(*MF);
    541 
    542   SmallString<128> Str;
    543   raw_svector_ostream O(Str);
    544   emitDemotedVars(MF->getFunction(), O);
    545   OutStreamer.EmitRawText(O.str());
    546 }
    547 
    548 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
    549   OutStreamer.EmitRawText(StringRef("}\n"));
    550   VRegMapping.clear();
    551 }
    552 
    553 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
    554                                                    raw_ostream &O) const {
    555   // If the NVVM IR has some of reqntid* specified, then output
    556   // the reqntid directive, and set the unspecified ones to 1.
    557   // If none of reqntid* is specified, don't output reqntid directive.
    558   unsigned reqntidx, reqntidy, reqntidz;
    559   bool specified = false;
    560   if (llvm::getReqNTIDx(F, reqntidx) == false)
    561     reqntidx = 1;
    562   else
    563     specified = true;
    564   if (llvm::getReqNTIDy(F, reqntidy) == false)
    565     reqntidy = 1;
    566   else
    567     specified = true;
    568   if (llvm::getReqNTIDz(F, reqntidz) == false)
    569     reqntidz = 1;
    570   else
    571     specified = true;
    572 
    573   if (specified)
    574     O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
    575       << "\n";
    576 
    577   // If the NVVM IR has some of maxntid* specified, then output
    578   // the maxntid directive, and set the unspecified ones to 1.
    579   // If none of maxntid* is specified, don't output maxntid directive.
    580   unsigned maxntidx, maxntidy, maxntidz;
    581   specified = false;
    582   if (llvm::getMaxNTIDx(F, maxntidx) == false)
    583     maxntidx = 1;
    584   else
    585     specified = true;
    586   if (llvm::getMaxNTIDy(F, maxntidy) == false)
    587     maxntidy = 1;
    588   else
    589     specified = true;
    590   if (llvm::getMaxNTIDz(F, maxntidz) == false)
    591     maxntidz = 1;
    592   else
    593     specified = true;
    594 
    595   if (specified)
    596     O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
    597       << "\n";
    598 
    599   unsigned mincta;
    600   if (llvm::getMinCTASm(F, mincta))
    601     O << ".minnctapersm " << mincta << "\n";
    602 }
    603 
    604 void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
    605                                              raw_ostream &O) {
    606   const TargetRegisterClass *RC = MRI->getRegClass(vr);
    607 
    608   DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
    609   unsigned mapped_vr = regmap[vr];
    610 
    611   if (!isVec) {
    612     O << getNVPTXRegClassStr(RC) << mapped_vr;
    613     return;
    614   }
    615   report_fatal_error("Bad register!");
    616 }
    617 
    618 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
    619                                           raw_ostream &O) {
    620   getVirtualRegisterName(vr, isVec, O);
    621 }
    622 
    623 void NVPTXAsmPrinter::printVecModifiedImmediate(
    624     const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
    625   static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
    626   int Imm = (int) MO.getImm();
    627   if (0 == strcmp(Modifier, "vecelem"))
    628     O << "_" << vecelem[Imm];
    629   else if (0 == strcmp(Modifier, "vecv4comm1")) {
    630     if ((Imm < 0) || (Imm > 3))
    631       O << "//";
    632   } else if (0 == strcmp(Modifier, "vecv4comm2")) {
    633     if ((Imm < 4) || (Imm > 7))
    634       O << "//";
    635   } else if (0 == strcmp(Modifier, "vecv4pos")) {
    636     if (Imm < 0)
    637       Imm = 0;
    638     O << "_" << vecelem[Imm % 4];
    639   } else if (0 == strcmp(Modifier, "vecv2comm1")) {
    640     if ((Imm < 0) || (Imm > 1))
    641       O << "//";
    642   } else if (0 == strcmp(Modifier, "vecv2comm2")) {
    643     if ((Imm < 2) || (Imm > 3))
    644       O << "//";
    645   } else if (0 == strcmp(Modifier, "vecv2pos")) {
    646     if (Imm < 0)
    647       Imm = 0;
    648     O << "_" << vecelem[Imm % 2];
    649   } else
    650     llvm_unreachable("Unknown Modifier on immediate operand");
    651 }
    652 
    653 
    654 
    655 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
    656 
    657   emitLinkageDirective(F, O);
    658   if (llvm::isKernelFunction(*F))
    659     O << ".entry ";
    660   else
    661     O << ".func ";
    662   printReturnValStr(F, O);
    663   O << *Mang->getSymbol(F) << "\n";
    664   emitFunctionParamList(F, O);
    665   O << ";\n";
    666 }
    667 
    668 static bool usedInGlobalVarDef(const Constant *C) {
    669   if (!C)
    670     return false;
    671 
    672   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
    673     if (GV->getName().str() == "llvm.used")
    674       return false;
    675     return true;
    676   }
    677 
    678   for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
    679        ui != ue; ++ui) {
    680     const Constant *C = dyn_cast<Constant>(*ui);
    681     if (usedInGlobalVarDef(C))
    682       return true;
    683   }
    684   return false;
    685 }
    686 
    687 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
    688   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
    689     if (othergv->getName().str() == "llvm.used")
    690       return true;
    691   }
    692 
    693   if (const Instruction *instr = dyn_cast<Instruction>(U)) {
    694     if (instr->getParent() && instr->getParent()->getParent()) {
    695       const Function *curFunc = instr->getParent()->getParent();
    696       if (oneFunc && (curFunc != oneFunc))
    697         return false;
    698       oneFunc = curFunc;
    699       return true;
    700     } else
    701       return false;
    702   }
    703 
    704   if (const MDNode *md = dyn_cast<MDNode>(U))
    705     if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
    706                           (md->getName().str() == "llvm.dbg.sp")))
    707       return true;
    708 
    709   for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end();
    710        ui != ue; ++ui) {
    711     if (usedInOneFunc(*ui, oneFunc) == false)
    712       return false;
    713   }
    714   return true;
    715 }
    716 
    717 /* Find out if a global variable can be demoted to local scope.
    718  * Currently, this is valid for CUDA shared variables, which have local
    719  * scope and global lifetime. So the conditions to check are :
    720  * 1. Is the global variable in shared address space?
    721  * 2. Does it have internal linkage?
    722  * 3. Is the global variable referenced only in one function?
    723  */
    724 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
    725   if (gv->hasInternalLinkage() == false)
    726     return false;
    727   const PointerType *Pty = gv->getType();
    728   if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
    729     return false;
    730 
    731   const Function *oneFunc = 0;
    732 
    733   bool flag = usedInOneFunc(gv, oneFunc);
    734   if (flag == false)
    735     return false;
    736   if (!oneFunc)
    737     return false;
    738   f = oneFunc;
    739   return true;
    740 }
    741 
    742 static bool useFuncSeen(const Constant *C,
    743                         llvm::DenseMap<const Function *, bool> &seenMap) {
    744   for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
    745        ui != ue; ++ui) {
    746     if (const Constant *cu = dyn_cast<Constant>(*ui)) {
    747       if (useFuncSeen(cu, seenMap))
    748         return true;
    749     } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
    750       const BasicBlock *bb = I->getParent();
    751       if (!bb)
    752         continue;
    753       const Function *caller = bb->getParent();
    754       if (!caller)
    755         continue;
    756       if (seenMap.find(caller) != seenMap.end())
    757         return true;
    758     }
    759   }
    760   return false;
    761 }
    762 
    763 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
    764   llvm::DenseMap<const Function *, bool> seenMap;
    765   for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
    766     const Function *F = FI;
    767 
    768     if (F->isDeclaration()) {
    769       if (F->use_empty())
    770         continue;
    771       if (F->getIntrinsicID())
    772         continue;
    773       emitDeclaration(F, O);
    774       continue;
    775     }
    776     for (Value::const_use_iterator iter = F->use_begin(),
    777                                    iterEnd = F->use_end();
    778          iter != iterEnd; ++iter) {
    779       if (const Constant *C = dyn_cast<Constant>(*iter)) {
    780         if (usedInGlobalVarDef(C)) {
    781           // The use is in the initialization of a global variable
    782           // that is a function pointer, so print a declaration
    783           // for the original function
    784           emitDeclaration(F, O);
    785           break;
    786         }
    787         // Emit a declaration of this function if the function that
    788         // uses this constant expr has already been seen.
    789         if (useFuncSeen(C, seenMap)) {
    790           emitDeclaration(F, O);
    791           break;
    792         }
    793       }
    794 
    795       if (!isa<Instruction>(*iter))
    796         continue;
    797       const Instruction *instr = cast<Instruction>(*iter);
    798       const BasicBlock *bb = instr->getParent();
    799       if (!bb)
    800         continue;
    801       const Function *caller = bb->getParent();
    802       if (!caller)
    803         continue;
    804 
    805       // If a caller has already been seen, then the caller is
    806       // appearing in the module before the callee. so print out
    807       // a declaration for the callee.
    808       if (seenMap.find(caller) != seenMap.end()) {
    809         emitDeclaration(F, O);
    810         break;
    811       }
    812     }
    813     seenMap[F] = true;
    814   }
    815 }
    816 
    817 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
    818   DebugInfoFinder DbgFinder;
    819   DbgFinder.processModule(M);
    820 
    821   unsigned i = 1;
    822   for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
    823                                  E = DbgFinder.compile_unit_end();
    824        I != E; ++I) {
    825     DICompileUnit DIUnit(*I);
    826     StringRef Filename(DIUnit.getFilename());
    827     StringRef Dirname(DIUnit.getDirectory());
    828     SmallString<128> FullPathName = Dirname;
    829     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
    830       sys::path::append(FullPathName, Filename);
    831       Filename = FullPathName.str();
    832     }
    833     if (filenameMap.find(Filename.str()) != filenameMap.end())
    834       continue;
    835     filenameMap[Filename.str()] = i;
    836     OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
    837     ++i;
    838   }
    839 
    840   for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
    841                                  E = DbgFinder.subprogram_end();
    842        I != E; ++I) {
    843     DISubprogram SP(*I);
    844     StringRef Filename(SP.getFilename());
    845     StringRef Dirname(SP.getDirectory());
    846     SmallString<128> FullPathName = Dirname;
    847     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
    848       sys::path::append(FullPathName, Filename);
    849       Filename = FullPathName.str();
    850     }
    851     if (filenameMap.find(Filename.str()) != filenameMap.end())
    852       continue;
    853     filenameMap[Filename.str()] = i;
    854     ++i;
    855   }
    856 }
    857 
    858 bool NVPTXAsmPrinter::doInitialization(Module &M) {
    859 
    860   SmallString<128> Str1;
    861   raw_svector_ostream OS1(Str1);
    862 
    863   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
    864   MMI->AnalyzeModule(M);
    865 
    866   // We need to call the parent's one explicitly.
    867   //bool Result = AsmPrinter::doInitialization(M);
    868 
    869   // Initialize TargetLoweringObjectFile.
    870   const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
    871       .Initialize(OutContext, TM);
    872 
    873   Mang = new Mangler(OutContext, &TM);
    874 
    875   // Emit header before any dwarf directives are emitted below.
    876   emitHeader(M, OS1);
    877   OutStreamer.EmitRawText(OS1.str());
    878 
    879   // Already commented out
    880   //bool Result = AsmPrinter::doInitialization(M);
    881 
    882   // Emit module-level inline asm if it exists.
    883   if (!M.getModuleInlineAsm().empty()) {
    884     OutStreamer.AddComment("Start of file scope inline assembly");
    885     OutStreamer.AddBlankLine();
    886     OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm()));
    887     OutStreamer.AddBlankLine();
    888     OutStreamer.AddComment("End of file scope inline assembly");
    889     OutStreamer.AddBlankLine();
    890   }
    891 
    892   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
    893     recordAndEmitFilenames(M);
    894 
    895   GlobalsEmitted = false;
    896 
    897   return false; // success
    898 }
    899 
    900 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
    901   SmallString<128> Str2;
    902   raw_svector_ostream OS2(Str2);
    903 
    904   emitDeclarations(M, OS2);
    905 
    906   // As ptxas does not support forward references of globals, we need to first
    907   // sort the list of module-level globals in def-use order. We visit each
    908   // global variable in order, and ensure that we emit it *after* its dependent
    909   // globals. We use a little extra memory maintaining both a set and a list to
    910   // have fast searches while maintaining a strict ordering.
    911   SmallVector<const GlobalVariable *, 8> Globals;
    912   DenseSet<const GlobalVariable *> GVVisited;
    913   DenseSet<const GlobalVariable *> GVVisiting;
    914 
    915   // Visit each global variable, in order
    916   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
    917        I != E; ++I)
    918     VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
    919 
    920   assert(GVVisited.size() == M.getGlobalList().size() &&
    921          "Missed a global variable");
    922   assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
    923 
    924   // Print out module-level global variables in proper order
    925   for (unsigned i = 0, e = Globals.size(); i != e; ++i)
    926     printModuleLevelGV(Globals[i], OS2);
    927 
    928   OS2 << '\n';
    929 
    930   OutStreamer.EmitRawText(OS2.str());
    931 }
    932 
    933 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
    934   O << "//\n";
    935   O << "// Generated by LLVM NVPTX Back-End\n";
    936   O << "//\n";
    937   O << "\n";
    938 
    939   unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
    940   O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
    941 
    942   O << ".target ";
    943   O << nvptxSubtarget.getTargetName();
    944 
    945   if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
    946     O << ", texmode_independent";
    947   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
    948     if (!nvptxSubtarget.hasDouble())
    949       O << ", map_f64_to_f32";
    950   }
    951 
    952   if (MAI->doesSupportDebugInformation())
    953     O << ", debug";
    954 
    955   O << "\n";
    956 
    957   O << ".address_size ";
    958   if (nvptxSubtarget.is64Bit())
    959     O << "64";
    960   else
    961     O << "32";
    962   O << "\n";
    963 
    964   O << "\n";
    965 }
    966 
    967 bool NVPTXAsmPrinter::doFinalization(Module &M) {
    968 
    969   // If we did not emit any functions, then the global declarations have not
    970   // yet been emitted.
    971   if (!GlobalsEmitted) {
    972     emitGlobals(M);
    973     GlobalsEmitted = true;
    974   }
    975 
    976   // XXX Temproarily remove global variables so that doFinalization() will not
    977   // emit them again (global variables are emitted at beginning).
    978 
    979   Module::GlobalListType &global_list = M.getGlobalList();
    980   int i, n = global_list.size();
    981   GlobalVariable **gv_array = new GlobalVariable *[n];
    982 
    983   // first, back-up GlobalVariable in gv_array
    984   i = 0;
    985   for (Module::global_iterator I = global_list.begin(), E = global_list.end();
    986        I != E; ++I)
    987     gv_array[i++] = &*I;
    988 
    989   // second, empty global_list
    990   while (!global_list.empty())
    991     global_list.remove(global_list.begin());
    992 
    993   // call doFinalization
    994   bool ret = AsmPrinter::doFinalization(M);
    995 
    996   // now we restore global variables
    997   for (i = 0; i < n; i++)
    998     global_list.insert(global_list.end(), gv_array[i]);
    999 
   1000   delete[] gv_array;
   1001   return ret;
   1002 
   1003   //bool Result = AsmPrinter::doFinalization(M);
   1004   // Instead of calling the parents doFinalization, we may
   1005   // clone parents doFinalization and customize here.
   1006   // Currently, we if NVISA out the EmitGlobals() in
   1007   // parent's doFinalization, which is too intrusive.
   1008   //
   1009   // Same for the doInitialization.
   1010   //return Result;
   1011 }
   1012 
   1013 // This function emits appropriate linkage directives for
   1014 // functions and global variables.
   1015 //
   1016 // extern function declaration            -> .extern
   1017 // extern function definition             -> .visible
   1018 // external global variable with init     -> .visible
   1019 // external without init                  -> .extern
   1020 // appending                              -> not allowed, assert.
   1021 
   1022 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
   1023                                            raw_ostream &O) {
   1024   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
   1025     if (V->hasExternalLinkage()) {
   1026       if (isa<GlobalVariable>(V)) {
   1027         const GlobalVariable *GVar = cast<GlobalVariable>(V);
   1028         if (GVar) {
   1029           if (GVar->hasInitializer())
   1030             O << ".visible ";
   1031           else
   1032             O << ".extern ";
   1033         }
   1034       } else if (V->isDeclaration())
   1035         O << ".extern ";
   1036       else
   1037         O << ".visible ";
   1038     } else if (V->hasAppendingLinkage()) {
   1039       std::string msg;
   1040       msg.append("Error: ");
   1041       msg.append("Symbol ");
   1042       if (V->hasName())
   1043         msg.append(V->getName().str());
   1044       msg.append("has unsupported appending linkage type");
   1045       llvm_unreachable(msg.c_str());
   1046     }
   1047   }
   1048 }
   1049 
   1050 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
   1051                                          raw_ostream &O,
   1052                                          bool processDemoted) {
   1053 
   1054   // Skip meta data
   1055   if (GVar->hasSection()) {
   1056     if (GVar->getSection() == "llvm.metadata")
   1057       return;
   1058   }
   1059 
   1060   const DataLayout *TD = TM.getDataLayout();
   1061 
   1062   // GlobalVariables are always constant pointers themselves.
   1063   const PointerType *PTy = GVar->getType();
   1064   Type *ETy = PTy->getElementType();
   1065 
   1066   if (GVar->hasExternalLinkage()) {
   1067     if (GVar->hasInitializer())
   1068       O << ".visible ";
   1069     else
   1070       O << ".extern ";
   1071   }
   1072 
   1073   if (llvm::isTexture(*GVar)) {
   1074     O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
   1075     return;
   1076   }
   1077 
   1078   if (llvm::isSurface(*GVar)) {
   1079     O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
   1080     return;
   1081   }
   1082 
   1083   if (GVar->isDeclaration()) {
   1084     // (extern) declarations, no definition or initializer
   1085     // Currently the only known declaration is for an automatic __local
   1086     // (.shared) promoted to global.
   1087     emitPTXGlobalVariable(GVar, O);
   1088     O << ";\n";
   1089     return;
   1090   }
   1091 
   1092   if (llvm::isSampler(*GVar)) {
   1093     O << ".global .samplerref " << llvm::getSamplerName(*GVar);
   1094 
   1095     const Constant *Initializer = NULL;
   1096     if (GVar->hasInitializer())
   1097       Initializer = GVar->getInitializer();
   1098     const ConstantInt *CI = NULL;
   1099     if (Initializer)
   1100       CI = dyn_cast<ConstantInt>(Initializer);
   1101     if (CI) {
   1102       unsigned sample = CI->getZExtValue();
   1103 
   1104       O << " = { ";
   1105 
   1106       for (int i = 0,
   1107                addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
   1108            i < 3; i++) {
   1109         O << "addr_mode_" << i << " = ";
   1110         switch (addr) {
   1111         case 0:
   1112           O << "wrap";
   1113           break;
   1114         case 1:
   1115           O << "clamp_to_border";
   1116           break;
   1117         case 2:
   1118           O << "clamp_to_edge";
   1119           break;
   1120         case 3:
   1121           O << "wrap";
   1122           break;
   1123         case 4:
   1124           O << "mirror";
   1125           break;
   1126         }
   1127         O << ", ";
   1128       }
   1129       O << "filter_mode = ";
   1130       switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
   1131       case 0:
   1132         O << "nearest";
   1133         break;
   1134       case 1:
   1135         O << "linear";
   1136         break;
   1137       case 2:
   1138         assert(0 && "Anisotropic filtering is not supported");
   1139       default:
   1140         O << "nearest";
   1141         break;
   1142       }
   1143       if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
   1144         O << ", force_unnormalized_coords = 1";
   1145       }
   1146       O << " }";
   1147     }
   1148 
   1149     O << ";\n";
   1150     return;
   1151   }
   1152 
   1153   if (GVar->hasPrivateLinkage()) {
   1154 
   1155     if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
   1156       return;
   1157 
   1158     // FIXME - need better way (e.g. Metadata) to avoid generating this global
   1159     if (!strncmp(GVar->getName().data(), "filename", 8))
   1160       return;
   1161     if (GVar->use_empty())
   1162       return;
   1163   }
   1164 
   1165   const Function *demotedFunc = 0;
   1166   if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
   1167     O << "// " << GVar->getName().str() << " has been demoted\n";
   1168     if (localDecls.find(demotedFunc) != localDecls.end())
   1169       localDecls[demotedFunc].push_back(GVar);
   1170     else {
   1171       std::vector<const GlobalVariable *> temp;
   1172       temp.push_back(GVar);
   1173       localDecls[demotedFunc] = temp;
   1174     }
   1175     return;
   1176   }
   1177 
   1178   O << ".";
   1179   emitPTXAddressSpace(PTy->getAddressSpace(), O);
   1180   if (GVar->getAlignment() == 0)
   1181     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
   1182   else
   1183     O << " .align " << GVar->getAlignment();
   1184 
   1185   if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
   1186     O << " .";
   1187     // Special case: ABI requires that we use .u8 for predicates
   1188     if (ETy->isIntegerTy(1))
   1189       O << "u8";
   1190     else
   1191       O << getPTXFundamentalTypeStr(ETy, false);
   1192     O << " ";
   1193     O << *Mang->getSymbol(GVar);
   1194 
   1195     // Ptx allows variable initilization only for constant and global state
   1196     // spaces.
   1197     if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
   1198          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
   1199         GVar->hasInitializer()) {
   1200       const Constant *Initializer = GVar->getInitializer();
   1201       if (!Initializer->isNullValue()) {
   1202         O << " = ";
   1203         printScalarConstant(Initializer, O);
   1204       }
   1205     }
   1206   } else {
   1207     unsigned int ElementSize = 0;
   1208 
   1209     // Although PTX has direct support for struct type and array type and
   1210     // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
   1211     // targets that support these high level field accesses. Structs, arrays
   1212     // and vectors are lowered into arrays of bytes.
   1213     switch (ETy->getTypeID()) {
   1214     case Type::StructTyID:
   1215     case Type::ArrayTyID:
   1216     case Type::VectorTyID:
   1217       ElementSize = TD->getTypeStoreSize(ETy);
   1218       // Ptx allows variable initilization only for constant and
   1219       // global state spaces.
   1220       if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
   1221            (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
   1222           GVar->hasInitializer()) {
   1223         const Constant *Initializer = GVar->getInitializer();
   1224         if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
   1225           AggBuffer aggBuffer(ElementSize, O, *this);
   1226           bufferAggregateConstant(Initializer, &aggBuffer);
   1227           if (aggBuffer.numSymbols) {
   1228             if (nvptxSubtarget.is64Bit()) {
   1229               O << " .u64 " << *Mang->getSymbol(GVar) << "[";
   1230               O << ElementSize / 8;
   1231             } else {
   1232               O << " .u32 " << *Mang->getSymbol(GVar) << "[";
   1233               O << ElementSize / 4;
   1234             }
   1235             O << "]";
   1236           } else {
   1237             O << " .b8 " << *Mang->getSymbol(GVar) << "[";
   1238             O << ElementSize;
   1239             O << "]";
   1240           }
   1241           O << " = {";
   1242           aggBuffer.print();
   1243           O << "}";
   1244         } else {
   1245           O << " .b8 " << *Mang->getSymbol(GVar);
   1246           if (ElementSize) {
   1247             O << "[";
   1248             O << ElementSize;
   1249             O << "]";
   1250           }
   1251         }
   1252       } else {
   1253         O << " .b8 " << *Mang->getSymbol(GVar);
   1254         if (ElementSize) {
   1255           O << "[";
   1256           O << ElementSize;
   1257           O << "]";
   1258         }
   1259       }
   1260       break;
   1261     default:
   1262       assert(0 && "type not supported yet");
   1263     }
   1264 
   1265   }
   1266   O << ";\n";
   1267 }
   1268 
   1269 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
   1270   if (localDecls.find(f) == localDecls.end())
   1271     return;
   1272 
   1273   std::vector<const GlobalVariable *> &gvars = localDecls[f];
   1274 
   1275   for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
   1276     O << "\t// demoted variable\n\t";
   1277     printModuleLevelGV(gvars[i], O, true);
   1278   }
   1279 }
   1280 
   1281 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
   1282                                           raw_ostream &O) const {
   1283   switch (AddressSpace) {
   1284   case llvm::ADDRESS_SPACE_LOCAL:
   1285     O << "local";
   1286     break;
   1287   case llvm::ADDRESS_SPACE_GLOBAL:
   1288     O << "global";
   1289     break;
   1290   case llvm::ADDRESS_SPACE_CONST:
   1291     O << "const";
   1292     break;
   1293   case llvm::ADDRESS_SPACE_SHARED:
   1294     O << "shared";
   1295     break;
   1296   default:
   1297     report_fatal_error("Bad address space found while emitting PTX");
   1298     break;
   1299   }
   1300 }
   1301 
   1302 std::string
   1303 NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
   1304   switch (Ty->getTypeID()) {
   1305   default:
   1306     llvm_unreachable("unexpected type");
   1307     break;
   1308   case Type::IntegerTyID: {
   1309     unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
   1310     if (NumBits == 1)
   1311       return "pred";
   1312     else if (NumBits <= 64) {
   1313       std::string name = "u";
   1314       return name + utostr(NumBits);
   1315     } else {
   1316       llvm_unreachable("Integer too large");
   1317       break;
   1318     }
   1319     break;
   1320   }
   1321   case Type::FloatTyID:
   1322     return "f32";
   1323   case Type::DoubleTyID:
   1324     return "f64";
   1325   case Type::PointerTyID:
   1326     if (nvptxSubtarget.is64Bit())
   1327       if (useB4PTR)
   1328         return "b64";
   1329       else
   1330         return "u64";
   1331     else if (useB4PTR)
   1332       return "b32";
   1333     else
   1334       return "u32";
   1335   }
   1336   llvm_unreachable("unexpected type");
   1337   return NULL;
   1338 }
   1339 
   1340 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
   1341                                             raw_ostream &O) {
   1342 
   1343   const DataLayout *TD = TM.getDataLayout();
   1344 
   1345   // GlobalVariables are always constant pointers themselves.
   1346   const PointerType *PTy = GVar->getType();
   1347   Type *ETy = PTy->getElementType();
   1348 
   1349   O << ".";
   1350   emitPTXAddressSpace(PTy->getAddressSpace(), O);
   1351   if (GVar->getAlignment() == 0)
   1352     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
   1353   else
   1354     O << " .align " << GVar->getAlignment();
   1355 
   1356   if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
   1357     O << " .";
   1358     O << getPTXFundamentalTypeStr(ETy);
   1359     O << " ";
   1360     O << *Mang->getSymbol(GVar);
   1361     return;
   1362   }
   1363 
   1364   int64_t ElementSize = 0;
   1365 
   1366   // Although PTX has direct support for struct type and array type and LLVM IR
   1367   // is very similar to PTX, the LLVM CodeGen does not support for targets that
   1368   // support these high level field accesses. Structs and arrays are lowered
   1369   // into arrays of bytes.
   1370   switch (ETy->getTypeID()) {
   1371   case Type::StructTyID:
   1372   case Type::ArrayTyID:
   1373   case Type::VectorTyID:
   1374     ElementSize = TD->getTypeStoreSize(ETy);
   1375     O << " .b8 " << *Mang->getSymbol(GVar) << "[";
   1376     if (ElementSize) {
   1377       O << itostr(ElementSize);
   1378     }
   1379     O << "]";
   1380     break;
   1381   default:
   1382     assert(0 && "type not supported yet");
   1383   }
   1384   return;
   1385 }
   1386 
   1387 static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
   1388   if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
   1389     return TD->getPrefTypeAlignment(Ty);
   1390 
   1391   const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
   1392   if (ATy)
   1393     return getOpenCLAlignment(TD, ATy->getElementType());
   1394 
   1395   const VectorType *VTy = dyn_cast<VectorType>(Ty);
   1396   if (VTy) {
   1397     Type *ETy = VTy->getElementType();
   1398     unsigned int numE = VTy->getNumElements();
   1399     unsigned int alignE = TD->getPrefTypeAlignment(ETy);
   1400     if (numE == 3)
   1401       return 4 * alignE;
   1402     else
   1403       return numE * alignE;
   1404   }
   1405 
   1406   const StructType *STy = dyn_cast<StructType>(Ty);
   1407   if (STy) {
   1408     unsigned int alignStruct = 1;
   1409     // Go through each element of the struct and find the
   1410     // largest alignment.
   1411     for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
   1412       Type *ETy = STy->getElementType(i);
   1413       unsigned int align = getOpenCLAlignment(TD, ETy);
   1414       if (align > alignStruct)
   1415         alignStruct = align;
   1416     }
   1417     return alignStruct;
   1418   }
   1419 
   1420   const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
   1421   if (FTy)
   1422     return TD->getPointerPrefAlignment();
   1423   return TD->getPrefTypeAlignment(Ty);
   1424 }
   1425 
   1426 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
   1427                                      int paramIndex, raw_ostream &O) {
   1428   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
   1429       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
   1430     O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex;
   1431   else {
   1432     std::string argName = I->getName();
   1433     const char *p = argName.c_str();
   1434     while (*p) {
   1435       if (*p == '.')
   1436         O << "_";
   1437       else
   1438         O << *p;
   1439       p++;
   1440     }
   1441   }
   1442 }
   1443 
   1444 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
   1445   Function::const_arg_iterator I, E;
   1446   int i = 0;
   1447 
   1448   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
   1449       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
   1450     O << *CurrentFnSym << "_param_" << paramIndex;
   1451     return;
   1452   }
   1453 
   1454   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
   1455     if (i == paramIndex) {
   1456       printParamName(I, paramIndex, O);
   1457       return;
   1458     }
   1459   }
   1460   llvm_unreachable("paramIndex out of bound");
   1461 }
   1462 
   1463 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
   1464   const DataLayout *TD = TM.getDataLayout();
   1465   const AttributeSet &PAL = F->getAttributes();
   1466   const TargetLowering *TLI = TM.getTargetLowering();
   1467   Function::const_arg_iterator I, E;
   1468   unsigned paramIndex = 0;
   1469   bool first = true;
   1470   bool isKernelFunc = llvm::isKernelFunction(*F);
   1471   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
   1472   MVT thePointerTy = TLI->getPointerTy();
   1473 
   1474   O << "(\n";
   1475 
   1476   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
   1477     Type *Ty = I->getType();
   1478 
   1479     if (!first)
   1480       O << ",\n";
   1481 
   1482     first = false;
   1483 
   1484     // Handle image/sampler parameters
   1485     if (llvm::isSampler(*I) || llvm::isImage(*I)) {
   1486       if (llvm::isImage(*I)) {
   1487         std::string sname = I->getName();
   1488         if (llvm::isImageWriteOnly(*I))
   1489           O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_"
   1490             << paramIndex;
   1491         else // Default image is read_only
   1492           O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_"
   1493             << paramIndex;
   1494       } else // Should be llvm::isSampler(*I)
   1495         O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_"
   1496           << paramIndex;
   1497       continue;
   1498     }
   1499 
   1500     if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
   1501       if (Ty->isVectorTy()) {
   1502         // Just print .param .b8 .align <a> .param[size];
   1503         // <a> = PAL.getparamalignment
   1504         // size = typeallocsize of element type
   1505         unsigned align = PAL.getParamAlignment(paramIndex + 1);
   1506         if (align == 0)
   1507           align = TD->getABITypeAlignment(Ty);
   1508 
   1509         unsigned sz = TD->getTypeAllocSize(Ty);
   1510         O << "\t.param .align " << align << " .b8 ";
   1511         printParamName(I, paramIndex, O);
   1512         O << "[" << sz << "]";
   1513 
   1514         continue;
   1515       }
   1516       // Just a scalar
   1517       const PointerType *PTy = dyn_cast<PointerType>(Ty);
   1518       if (isKernelFunc) {
   1519         if (PTy) {
   1520           // Special handling for pointer arguments to kernel
   1521           O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
   1522 
   1523           if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
   1524             Type *ETy = PTy->getElementType();
   1525             int addrSpace = PTy->getAddressSpace();
   1526             switch (addrSpace) {
   1527             default:
   1528               O << ".ptr ";
   1529               break;
   1530             case llvm::ADDRESS_SPACE_CONST:
   1531               O << ".ptr .const ";
   1532               break;
   1533             case llvm::ADDRESS_SPACE_SHARED:
   1534               O << ".ptr .shared ";
   1535               break;
   1536             case llvm::ADDRESS_SPACE_GLOBAL:
   1537               O << ".ptr .global ";
   1538               break;
   1539             }
   1540             O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
   1541           }
   1542           printParamName(I, paramIndex, O);
   1543           continue;
   1544         }
   1545 
   1546         // non-pointer scalar to kernel func
   1547         O << "\t.param .";
   1548         // Special case: predicate operands become .u8 types
   1549         if (Ty->isIntegerTy(1))
   1550           O << "u8";
   1551         else
   1552           O << getPTXFundamentalTypeStr(Ty);
   1553         O << " ";
   1554         printParamName(I, paramIndex, O);
   1555         continue;
   1556       }
   1557       // Non-kernel function, just print .param .b<size> for ABI
   1558       // and .reg .b<size> for non ABY
   1559       unsigned sz = 0;
   1560       if (isa<IntegerType>(Ty)) {
   1561         sz = cast<IntegerType>(Ty)->getBitWidth();
   1562         if (sz < 32)
   1563           sz = 32;
   1564       } else if (isa<PointerType>(Ty))
   1565         sz = thePointerTy.getSizeInBits();
   1566       else
   1567         sz = Ty->getPrimitiveSizeInBits();
   1568       if (isABI)
   1569         O << "\t.param .b" << sz << " ";
   1570       else
   1571         O << "\t.reg .b" << sz << " ";
   1572       printParamName(I, paramIndex, O);
   1573       continue;
   1574     }
   1575 
   1576     // param has byVal attribute. So should be a pointer
   1577     const PointerType *PTy = dyn_cast<PointerType>(Ty);
   1578     assert(PTy && "Param with byval attribute should be a pointer type");
   1579     Type *ETy = PTy->getElementType();
   1580 
   1581     if (isABI || isKernelFunc) {
   1582       // Just print .param .b8 .align <a> .param[size];
   1583       // <a> = PAL.getparamalignment
   1584       // size = typeallocsize of element type
   1585       unsigned align = PAL.getParamAlignment(paramIndex + 1);
   1586       if (align == 0)
   1587         align = TD->getABITypeAlignment(ETy);
   1588 
   1589       unsigned sz = TD->getTypeAllocSize(ETy);
   1590       O << "\t.param .align " << align << " .b8 ";
   1591       printParamName(I, paramIndex, O);
   1592       O << "[" << sz << "]";
   1593       continue;
   1594     } else {
   1595       // Split the ETy into constituent parts and
   1596       // print .param .b<size> <name> for each part.
   1597       // Further, if a part is vector, print the above for
   1598       // each vector element.
   1599       SmallVector<EVT, 16> vtparts;
   1600       ComputeValueVTs(*TLI, ETy, vtparts);
   1601       for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
   1602         unsigned elems = 1;
   1603         EVT elemtype = vtparts[i];
   1604         if (vtparts[i].isVector()) {
   1605           elems = vtparts[i].getVectorNumElements();
   1606           elemtype = vtparts[i].getVectorElementType();
   1607         }
   1608 
   1609         for (unsigned j = 0, je = elems; j != je; ++j) {
   1610           unsigned sz = elemtype.getSizeInBits();
   1611           if (elemtype.isInteger() && (sz < 32))
   1612             sz = 32;
   1613           O << "\t.reg .b" << sz << " ";
   1614           printParamName(I, paramIndex, O);
   1615           if (j < je - 1)
   1616             O << ",\n";
   1617           ++paramIndex;
   1618         }
   1619         if (i < e - 1)
   1620           O << ",\n";
   1621       }
   1622       --paramIndex;
   1623       continue;
   1624     }
   1625   }
   1626 
   1627   O << "\n)\n";
   1628 }
   1629 
   1630 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
   1631                                             raw_ostream &O) {
   1632   const Function *F = MF.getFunction();
   1633   emitFunctionParamList(F, O);
   1634 }
   1635 
   1636 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
   1637     const MachineFunction &MF) {
   1638   SmallString<128> Str;
   1639   raw_svector_ostream O(Str);
   1640 
   1641   // Map the global virtual register number to a register class specific
   1642   // virtual register number starting from 1 with that class.
   1643   const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
   1644   //unsigned numRegClasses = TRI->getNumRegClasses();
   1645 
   1646   // Emit the Fake Stack Object
   1647   const MachineFrameInfo *MFI = MF.getFrameInfo();
   1648   int NumBytes = (int) MFI->getStackSize();
   1649   if (NumBytes) {
   1650     O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
   1651       << getFunctionNumber() << "[" << NumBytes << "];\n";
   1652     if (nvptxSubtarget.is64Bit()) {
   1653       O << "\t.reg .b64 \t%SP;\n";
   1654       O << "\t.reg .b64 \t%SPL;\n";
   1655     } else {
   1656       O << "\t.reg .b32 \t%SP;\n";
   1657       O << "\t.reg .b32 \t%SPL;\n";
   1658     }
   1659   }
   1660 
   1661   // Go through all virtual registers to establish the mapping between the
   1662   // global virtual
   1663   // register number and the per class virtual register number.
   1664   // We use the per class virtual register number in the ptx output.
   1665   unsigned int numVRs = MRI->getNumVirtRegs();
   1666   for (unsigned i = 0; i < numVRs; i++) {
   1667     unsigned int vr = TRI->index2VirtReg(i);
   1668     const TargetRegisterClass *RC = MRI->getRegClass(vr);
   1669     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
   1670     int n = regmap.size();
   1671     regmap.insert(std::make_pair(vr, n + 1));
   1672   }
   1673 
   1674   // Emit register declarations
   1675   // @TODO: Extract out the real register usage
   1676   // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
   1677   // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
   1678   // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
   1679   // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
   1680   // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
   1681   // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
   1682   // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
   1683 
   1684   // Emit declaration of the virtual registers or 'physical' registers for
   1685   // each register class
   1686   for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
   1687     const TargetRegisterClass *RC = TRI->getRegClass(i);
   1688     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
   1689     std::string rcname = getNVPTXRegClassName(RC);
   1690     std::string rcStr = getNVPTXRegClassStr(RC);
   1691     int n = regmap.size();
   1692 
   1693     // Only declare those registers that may be used.
   1694     if (n) {
   1695        O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
   1696          << ">;\n";
   1697     }
   1698   }
   1699 
   1700   OutStreamer.EmitRawText(O.str());
   1701 }
   1702 
   1703 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
   1704   APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
   1705   bool ignored;
   1706   unsigned int numHex;
   1707   const char *lead;
   1708 
   1709   if (Fp->getType()->getTypeID() == Type::FloatTyID) {
   1710     numHex = 8;
   1711     lead = "0f";
   1712     APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
   1713   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
   1714     numHex = 16;
   1715     lead = "0d";
   1716     APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
   1717   } else
   1718     llvm_unreachable("unsupported fp type");
   1719 
   1720   APInt API = APF.bitcastToAPInt();
   1721   std::string hexstr(utohexstr(API.getZExtValue()));
   1722   O << lead;
   1723   if (hexstr.length() < numHex)
   1724     O << std::string(numHex - hexstr.length(), '0');
   1725   O << utohexstr(API.getZExtValue());
   1726 }
   1727 
   1728 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
   1729   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
   1730     O << CI->getValue();
   1731     return;
   1732   }
   1733   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
   1734     printFPConstant(CFP, O);
   1735     return;
   1736   }
   1737   if (isa<ConstantPointerNull>(CPV)) {
   1738     O << "0";
   1739     return;
   1740   }
   1741   if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
   1742     O << *Mang->getSymbol(GVar);
   1743     return;
   1744   }
   1745   if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   1746     const Value *v = Cexpr->stripPointerCasts();
   1747     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
   1748       O << *Mang->getSymbol(GVar);
   1749       return;
   1750     } else {
   1751       O << *LowerConstant(CPV, *this);
   1752       return;
   1753     }
   1754   }
   1755   llvm_unreachable("Not scalar type found in printScalarConstant()");
   1756 }
   1757 
   1758 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
   1759                                    AggBuffer *aggBuffer) {
   1760 
   1761   const DataLayout *TD = TM.getDataLayout();
   1762 
   1763   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
   1764     int s = TD->getTypeAllocSize(CPV->getType());
   1765     if (s < Bytes)
   1766       s = Bytes;
   1767     aggBuffer->addZeros(s);
   1768     return;
   1769   }
   1770 
   1771   unsigned char *ptr;
   1772   switch (CPV->getType()->getTypeID()) {
   1773 
   1774   case Type::IntegerTyID: {
   1775     const Type *ETy = CPV->getType();
   1776     if (ETy == Type::getInt8Ty(CPV->getContext())) {
   1777       unsigned char c =
   1778           (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
   1779       ptr = &c;
   1780       aggBuffer->addBytes(ptr, 1, Bytes);
   1781     } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
   1782       short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
   1783       ptr = (unsigned char *)&int16;
   1784       aggBuffer->addBytes(ptr, 2, Bytes);
   1785     } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
   1786       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
   1787         int int32 = (int)(constInt->getZExtValue());
   1788         ptr = (unsigned char *)&int32;
   1789         aggBuffer->addBytes(ptr, 4, Bytes);
   1790         break;
   1791       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   1792         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
   1793                 ConstantFoldConstantExpression(Cexpr, TD))) {
   1794           int int32 = (int)(constInt->getZExtValue());
   1795           ptr = (unsigned char *)&int32;
   1796           aggBuffer->addBytes(ptr, 4, Bytes);
   1797           break;
   1798         }
   1799         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
   1800           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
   1801           aggBuffer->addSymbol(v);
   1802           aggBuffer->addZeros(4);
   1803           break;
   1804         }
   1805       }
   1806       llvm_unreachable("unsupported integer const type");
   1807     } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
   1808       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
   1809         long long int64 = (long long)(constInt->getZExtValue());
   1810         ptr = (unsigned char *)&int64;
   1811         aggBuffer->addBytes(ptr, 8, Bytes);
   1812         break;
   1813       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   1814         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
   1815                 ConstantFoldConstantExpression(Cexpr, TD))) {
   1816           long long int64 = (long long)(constInt->getZExtValue());
   1817           ptr = (unsigned char *)&int64;
   1818           aggBuffer->addBytes(ptr, 8, Bytes);
   1819           break;
   1820         }
   1821         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
   1822           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
   1823           aggBuffer->addSymbol(v);
   1824           aggBuffer->addZeros(8);
   1825           break;
   1826         }
   1827       }
   1828       llvm_unreachable("unsupported integer const type");
   1829     } else
   1830       llvm_unreachable("unsupported integer const type");
   1831     break;
   1832   }
   1833   case Type::FloatTyID:
   1834   case Type::DoubleTyID: {
   1835     const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
   1836     const Type *Ty = CFP->getType();
   1837     if (Ty == Type::getFloatTy(CPV->getContext())) {
   1838       float float32 = (float) CFP->getValueAPF().convertToFloat();
   1839       ptr = (unsigned char *)&float32;
   1840       aggBuffer->addBytes(ptr, 4, Bytes);
   1841     } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
   1842       double float64 = CFP->getValueAPF().convertToDouble();
   1843       ptr = (unsigned char *)&float64;
   1844       aggBuffer->addBytes(ptr, 8, Bytes);
   1845     } else {
   1846       llvm_unreachable("unsupported fp const type");
   1847     }
   1848     break;
   1849   }
   1850   case Type::PointerTyID: {
   1851     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
   1852       aggBuffer->addSymbol(GVar);
   1853     } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   1854       const Value *v = Cexpr->stripPointerCasts();
   1855       aggBuffer->addSymbol(v);
   1856     }
   1857     unsigned int s = TD->getTypeAllocSize(CPV->getType());
   1858     aggBuffer->addZeros(s);
   1859     break;
   1860   }
   1861 
   1862   case Type::ArrayTyID:
   1863   case Type::VectorTyID:
   1864   case Type::StructTyID: {
   1865     if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
   1866         isa<ConstantStruct>(CPV)) {
   1867       int ElementSize = TD->getTypeAllocSize(CPV->getType());
   1868       bufferAggregateConstant(CPV, aggBuffer);
   1869       if (Bytes > ElementSize)
   1870         aggBuffer->addZeros(Bytes - ElementSize);
   1871     } else if (isa<ConstantAggregateZero>(CPV))
   1872       aggBuffer->addZeros(Bytes);
   1873     else
   1874       llvm_unreachable("Unexpected Constant type");
   1875     break;
   1876   }
   1877 
   1878   default:
   1879     llvm_unreachable("unsupported type");
   1880   }
   1881 }
   1882 
   1883 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
   1884                                               AggBuffer *aggBuffer) {
   1885   const DataLayout *TD = TM.getDataLayout();
   1886   int Bytes;
   1887 
   1888   // Old constants
   1889   if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
   1890     if (CPV->getNumOperands())
   1891       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
   1892         bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
   1893     return;
   1894   }
   1895 
   1896   if (const ConstantDataSequential *CDS =
   1897           dyn_cast<ConstantDataSequential>(CPV)) {
   1898     if (CDS->getNumElements())
   1899       for (unsigned i = 0; i < CDS->getNumElements(); ++i)
   1900         bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
   1901                      aggBuffer);
   1902     return;
   1903   }
   1904 
   1905   if (isa<ConstantStruct>(CPV)) {
   1906     if (CPV->getNumOperands()) {
   1907       StructType *ST = cast<StructType>(CPV->getType());
   1908       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
   1909         if (i == (e - 1))
   1910           Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
   1911                   TD->getTypeAllocSize(ST) -
   1912                   TD->getStructLayout(ST)->getElementOffset(i);
   1913         else
   1914           Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
   1915                   TD->getStructLayout(ST)->getElementOffset(i);
   1916         bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
   1917       }
   1918     }
   1919     return;
   1920   }
   1921   llvm_unreachable("unsupported constant type in printAggregateConstant()");
   1922 }
   1923 
   1924 // buildTypeNameMap - Run through symbol table looking for type names.
   1925 //
   1926 
   1927 bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
   1928 
   1929   std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
   1930 
   1931   if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
   1932                                   !PI->second.compare("struct._image2d_t") ||
   1933                                   !PI->second.compare("struct._image3d_t")))
   1934     return true;
   1935 
   1936   return false;
   1937 }
   1938 
   1939 
   1940 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
   1941   switch (MI.getOpcode()) {
   1942   default:
   1943     return false;
   1944   case NVPTX::CallArgBeginInst:
   1945   case NVPTX::CallArgEndInst0:
   1946   case NVPTX::CallArgEndInst1:
   1947   case NVPTX::CallArgF32:
   1948   case NVPTX::CallArgF64:
   1949   case NVPTX::CallArgI16:
   1950   case NVPTX::CallArgI32:
   1951   case NVPTX::CallArgI32imm:
   1952   case NVPTX::CallArgI64:
   1953   case NVPTX::CallArgParam:
   1954   case NVPTX::CallVoidInst:
   1955   case NVPTX::CallVoidInstReg:
   1956   case NVPTX::Callseq_End:
   1957   case NVPTX::CallVoidInstReg64:
   1958   case NVPTX::DeclareParamInst:
   1959   case NVPTX::DeclareRetMemInst:
   1960   case NVPTX::DeclareRetRegInst:
   1961   case NVPTX::DeclareRetScalarInst:
   1962   case NVPTX::DeclareScalarParamInst:
   1963   case NVPTX::DeclareScalarRegInst:
   1964   case NVPTX::StoreParamF32:
   1965   case NVPTX::StoreParamF64:
   1966   case NVPTX::StoreParamI16:
   1967   case NVPTX::StoreParamI32:
   1968   case NVPTX::StoreParamI64:
   1969   case NVPTX::StoreParamI8:
   1970   case NVPTX::StoreRetvalF32:
   1971   case NVPTX::StoreRetvalF64:
   1972   case NVPTX::StoreRetvalI16:
   1973   case NVPTX::StoreRetvalI32:
   1974   case NVPTX::StoreRetvalI64:
   1975   case NVPTX::StoreRetvalI8:
   1976   case NVPTX::LastCallArgF32:
   1977   case NVPTX::LastCallArgF64:
   1978   case NVPTX::LastCallArgI16:
   1979   case NVPTX::LastCallArgI32:
   1980   case NVPTX::LastCallArgI32imm:
   1981   case NVPTX::LastCallArgI64:
   1982   case NVPTX::LastCallArgParam:
   1983   case NVPTX::LoadParamMemF32:
   1984   case NVPTX::LoadParamMemF64:
   1985   case NVPTX::LoadParamMemI16:
   1986   case NVPTX::LoadParamMemI32:
   1987   case NVPTX::LoadParamMemI64:
   1988   case NVPTX::LoadParamMemI8:
   1989   case NVPTX::PrototypeInst:
   1990   case NVPTX::DBG_VALUE:
   1991     return true;
   1992   }
   1993   return false;
   1994 }
   1995 
   1996 // Force static initialization.
   1997 extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
   1998   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
   1999   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
   2000 }
   2001 
   2002 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   2003   std::stringstream temp;
   2004   LineReader *reader = this->getReader(filename.str());
   2005   temp << "\n//";
   2006   temp << filename.str();
   2007   temp << ":";
   2008   temp << line;
   2009   temp << " ";
   2010   temp << reader->readLine(line);
   2011   temp << "\n";
   2012   this->OutStreamer.EmitRawText(Twine(temp.str()));
   2013 }
   2014 
   2015 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
   2016   if (reader == NULL) {
   2017     reader = new LineReader(filename);
   2018   }
   2019 
   2020   if (reader->fileName() != filename) {
   2021     delete reader;
   2022     reader = new LineReader(filename);
   2023   }
   2024 
   2025   return reader;
   2026 }
   2027 
   2028 std::string LineReader::readLine(unsigned lineNum) {
   2029   if (lineNum < theCurLine) {
   2030     theCurLine = 0;
   2031     fstr.seekg(0, std::ios::beg);
   2032   }
   2033   while (theCurLine < lineNum) {
   2034     fstr.getline(buff, 500);
   2035     theCurLine++;
   2036   }
   2037   return buff;
   2038 }
   2039 
   2040 // Force static initialization.
   2041 extern "C" void LLVMInitializeNVPTXAsmPrinter() {
   2042   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
   2043   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
   2044 }
   2045