Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains a printer that converts from our internal representation
     11 // of machine-dependent LLVM code to NVPTX assembly language.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "NVPTXAsmPrinter.h"
     16 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
     17 #include "NVPTX.h"
     18 #include "NVPTXInstrInfo.h"
     19 #include "NVPTXNumRegisters.h"
     20 #include "NVPTXRegisterInfo.h"
     21 #include "NVPTXTargetMachine.h"
     22 #include "NVPTXUtilities.h"
     23 #include "cl_common_defines.h"
     24 #include "llvm/ADT/StringExtras.h"
     25 #include "llvm/Analysis/ConstantFolding.h"
     26 #include "llvm/Assembly/Writer.h"
     27 #include "llvm/CodeGen/Analysis.h"
     28 #include "llvm/CodeGen/MachineFrameInfo.h"
     29 #include "llvm/CodeGen/MachineModuleInfo.h"
     30 #include "llvm/CodeGen/MachineRegisterInfo.h"
     31 #include "llvm/DebugInfo.h"
     32 #include "llvm/IR/DerivedTypes.h"
     33 #include "llvm/IR/Function.h"
     34 #include "llvm/IR/GlobalVariable.h"
     35 #include "llvm/IR/Module.h"
     36 #include "llvm/IR/Operator.h"
     37 #include "llvm/MC/MCStreamer.h"
     38 #include "llvm/MC/MCSymbol.h"
     39 #include "llvm/Support/CommandLine.h"
     40 #include "llvm/Support/ErrorHandling.h"
     41 #include "llvm/Support/FormattedStream.h"
     42 #include "llvm/Support/Path.h"
     43 #include "llvm/Support/TargetRegistry.h"
     44 #include "llvm/Support/TimeValue.h"
     45 #include "llvm/Target/Mangler.h"
     46 #include "llvm/Target/TargetLoweringObjectFile.h"
     47 #include <sstream>
     48 using namespace llvm;
     49 
     50 
     51 #include "NVPTXGenAsmWriter.inc"
     52 
     53 bool RegAllocNilUsed = true;
     54 
     55 #define DEPOTNAME "__local_depot"
     56 
     57 static cl::opt<bool>
     58 EmitLineNumbers("nvptx-emit-line-numbers",
     59                 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
     60                 cl::init(true));
     61 
     62 namespace llvm  {
     63 bool InterleaveSrcInPtx = false;
     64 }
     65 
     66 static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src",
     67                                         cl::ZeroOrMore,
     68                        cl::desc("NVPTX Specific: Emit source line in ptx file"),
     69                                         cl::location(llvm::InterleaveSrcInPtx));
     70 
     71 
     72 namespace {
     73 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
     74 /// depends.
     75 void DiscoverDependentGlobals(Value *V,
     76                               DenseSet<GlobalVariable*> &Globals) {
     77   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     78     Globals.insert(GV);
     79   else {
     80     if (User *U = dyn_cast<User>(V)) {
     81       for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
     82         DiscoverDependentGlobals(U->getOperand(i), Globals);
     83       }
     84     }
     85   }
     86 }
     87 
     88 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
     89 /// instances to be emitted, but only after any dependents have been added
     90 /// first.
     91 void VisitGlobalVariableForEmission(GlobalVariable *GV,
     92                                     SmallVectorImpl<GlobalVariable*> &Order,
     93                                     DenseSet<GlobalVariable*> &Visited,
     94                                     DenseSet<GlobalVariable*> &Visiting) {
     95   // Have we already visited this one?
     96   if (Visited.count(GV)) return;
     97 
     98   // Do we have a circular dependency?
     99   if (Visiting.count(GV))
    100     report_fatal_error("Circular dependency found in global variable set");
    101 
    102   // Start visiting this global
    103   Visiting.insert(GV);
    104 
    105   // Make sure we visit all dependents first
    106   DenseSet<GlobalVariable*> Others;
    107   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
    108     DiscoverDependentGlobals(GV->getOperand(i), Others);
    109 
    110   for (DenseSet<GlobalVariable*>::iterator I = Others.begin(),
    111        E = Others.end(); I != E; ++I)
    112     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
    113 
    114   // Now we can visit ourself
    115   Order.push_back(GV);
    116   Visited.insert(GV);
    117   Visiting.erase(GV);
    118 }
    119 }
    120 
    121 // @TODO: This is a copy from AsmPrinter.cpp.  The function is static, so we
    122 // cannot just link to the existing version.
    123 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
    124 ///
    125 using namespace nvptx;
    126 const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
    127   MCContext &Ctx = AP.OutContext;
    128 
    129   if (CV->isNullValue() || isa<UndefValue>(CV))
    130     return MCConstantExpr::Create(0, Ctx);
    131 
    132   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
    133     return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
    134 
    135   if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
    136     return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
    137 
    138   if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
    139     return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
    140 
    141   const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
    142   if (CE == 0)
    143     llvm_unreachable("Unknown constant value to lower!");
    144 
    145 
    146   switch (CE->getOpcode()) {
    147   default:
    148     // If the code isn't optimized, there may be outstanding folding
    149     // opportunities. Attempt to fold the expression using DataLayout as a
    150     // last resort before giving up.
    151     if (Constant *C =
    152         ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
    153       if (C != CE)
    154         return LowerConstant(C, AP);
    155 
    156     // Otherwise report the problem to the user.
    157     {
    158         std::string S;
    159         raw_string_ostream OS(S);
    160         OS << "Unsupported expression in static initializer: ";
    161         WriteAsOperand(OS, CE, /*PrintType=*/false,
    162                        !AP.MF ? 0 : AP.MF->getFunction()->getParent());
    163         report_fatal_error(OS.str());
    164     }
    165   case Instruction::GetElementPtr: {
    166     const DataLayout &TD = *AP.TM.getDataLayout();
    167     // Generate a symbolic expression for the byte address
    168     APInt OffsetAI(TD.getPointerSizeInBits(), 0);
    169     cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
    170 
    171     const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
    172     if (!OffsetAI)
    173       return Base;
    174 
    175     int64_t Offset = OffsetAI.getSExtValue();
    176     return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
    177                                    Ctx);
    178   }
    179 
    180   case Instruction::Trunc:
    181     // We emit the value and depend on the assembler to truncate the generated
    182     // expression properly.  This is important for differences between
    183     // blockaddress labels.  Since the two labels are in the same function, it
    184     // is reasonable to treat their delta as a 32-bit value.
    185     // FALL THROUGH.
    186   case Instruction::BitCast:
    187     return LowerConstant(CE->getOperand(0), AP);
    188 
    189   case Instruction::IntToPtr: {
    190     const DataLayout &TD = *AP.TM.getDataLayout();
    191     // Handle casts to pointers by changing them into casts to the appropriate
    192     // integer type.  This promotes constant folding and simplifies this code.
    193     Constant *Op = CE->getOperand(0);
    194     Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
    195                                       false/*ZExt*/);
    196     return LowerConstant(Op, AP);
    197   }
    198 
    199   case Instruction::PtrToInt: {
    200     const DataLayout &TD = *AP.TM.getDataLayout();
    201     // Support only foldable casts to/from pointers that can be eliminated by
    202     // changing the pointer to the appropriately sized integer type.
    203     Constant *Op = CE->getOperand(0);
    204     Type *Ty = CE->getType();
    205 
    206     const MCExpr *OpExpr = LowerConstant(Op, AP);
    207 
    208     // We can emit the pointer value into this slot if the slot is an
    209     // integer slot equal to the size of the pointer.
    210     if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
    211       return OpExpr;
    212 
    213     // Otherwise the pointer is smaller than the resultant integer, mask off
    214     // the high bits so we are sure to get a proper truncation if the input is
    215     // a constant expr.
    216     unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
    217     const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
    218     return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
    219   }
    220 
    221   // The MC library also has a right-shift operator, but it isn't consistently
    222   // signed or unsigned between different targets.
    223   case Instruction::Add:
    224   case Instruction::Sub:
    225   case Instruction::Mul:
    226   case Instruction::SDiv:
    227   case Instruction::SRem:
    228   case Instruction::Shl:
    229   case Instruction::And:
    230   case Instruction::Or:
    231   case Instruction::Xor: {
    232     const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
    233     const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
    234     switch (CE->getOpcode()) {
    235     default: llvm_unreachable("Unknown binary operator constant cast expr");
    236     case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
    237     case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
    238     case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
    239     case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
    240     case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
    241     case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
    242     case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
    243     case Instruction::Or:  return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
    244     case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
    245     }
    246   }
    247   }
    248 }
    249 
    250 
    251 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
    252 {
    253   if (!EmitLineNumbers)
    254     return;
    255   if (ignoreLoc(MI))
    256     return;
    257 
    258   DebugLoc curLoc = MI.getDebugLoc();
    259 
    260   if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
    261     return;
    262 
    263   if (prevDebugLoc == curLoc)
    264     return;
    265 
    266   prevDebugLoc = curLoc;
    267 
    268   if (curLoc.isUnknown())
    269     return;
    270 
    271 
    272   const MachineFunction *MF = MI.getParent()->getParent();
    273   //const TargetMachine &TM = MF->getTarget();
    274 
    275   const LLVMContext &ctx = MF->getFunction()->getContext();
    276   DIScope Scope(curLoc.getScope(ctx));
    277 
    278   if (!Scope.Verify())
    279     return;
    280 
    281   StringRef fileName(Scope.getFilename());
    282   StringRef dirName(Scope.getDirectory());
    283   SmallString<128> FullPathName = dirName;
    284   if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
    285     sys::path::append(FullPathName, fileName);
    286     fileName = FullPathName.str();
    287   }
    288 
    289   if (filenameMap.find(fileName.str()) == filenameMap.end())
    290     return;
    291 
    292 
    293   // Emit the line from the source file.
    294   if (llvm::InterleaveSrcInPtx)
    295     this->emitSrcInText(fileName.str(), curLoc.getLine());
    296 
    297   std::stringstream temp;
    298   temp << "\t.loc " << filenameMap[fileName.str()]
    299        << " " << curLoc.getLine() << " " << curLoc.getCol();
    300   OutStreamer.EmitRawText(Twine(temp.str().c_str()));
    301 }
    302 
    303 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
    304   SmallString<128> Str;
    305   raw_svector_ostream OS(Str);
    306   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
    307     emitLineNumberAsDotLoc(*MI);
    308   printInstruction(MI, OS);
    309   OutStreamer.EmitRawText(OS.str());
    310 }
    311 
    312 void NVPTXAsmPrinter::printReturnValStr(const Function *F,
    313                                         raw_ostream &O)
    314 {
    315   const DataLayout *TD = TM.getDataLayout();
    316   const TargetLowering *TLI = TM.getTargetLowering();
    317 
    318   Type *Ty = F->getReturnType();
    319 
    320   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
    321 
    322   if (Ty->getTypeID() == Type::VoidTyID)
    323     return;
    324 
    325   O << " (";
    326 
    327   if (isABI) {
    328     if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
    329       unsigned size = 0;
    330       if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
    331         size = ITy->getBitWidth();
    332         if (size < 32) size = 32;
    333       } else {
    334         assert(Ty->isFloatingPointTy() &&
    335                "Floating point type expected here");
    336         size = Ty->getPrimitiveSizeInBits();
    337       }
    338 
    339       O << ".param .b" << size << " func_retval0";
    340     }
    341     else if (isa<PointerType>(Ty)) {
    342       O << ".param .b" << TLI->getPointerTy().getSizeInBits()
    343             << " func_retval0";
    344     } else {
    345       if ((Ty->getTypeID() == Type::StructTyID) ||
    346           isa<VectorType>(Ty)) {
    347         SmallVector<EVT, 16> vtparts;
    348         ComputeValueVTs(*TLI, Ty, vtparts);
    349         unsigned totalsz = 0;
    350         for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
    351           unsigned elems = 1;
    352           EVT elemtype = vtparts[i];
    353           if (vtparts[i].isVector()) {
    354             elems = vtparts[i].getVectorNumElements();
    355             elemtype = vtparts[i].getVectorElementType();
    356           }
    357           for (unsigned j=0, je=elems; j!=je; ++j) {
    358             unsigned sz = elemtype.getSizeInBits();
    359             if (elemtype.isInteger() && (sz < 8)) sz = 8;
    360             totalsz += sz/8;
    361           }
    362         }
    363         unsigned retAlignment = 0;
    364         if (!llvm::getAlign(*F, 0, retAlignment))
    365           retAlignment = TD->getABITypeAlignment(Ty);
    366         O << ".param .align "
    367             << retAlignment
    368             << " .b8 func_retval0["
    369             << totalsz << "]";
    370       } else
    371         assert(false &&
    372                "Unknown return type");
    373     }
    374   } else {
    375     SmallVector<EVT, 16> vtparts;
    376     ComputeValueVTs(*TLI, Ty, vtparts);
    377     unsigned idx = 0;
    378     for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
    379       unsigned elems = 1;
    380       EVT elemtype = vtparts[i];
    381       if (vtparts[i].isVector()) {
    382         elems = vtparts[i].getVectorNumElements();
    383         elemtype = vtparts[i].getVectorElementType();
    384       }
    385 
    386       for (unsigned j=0, je=elems; j!=je; ++j) {
    387         unsigned sz = elemtype.getSizeInBits();
    388         if (elemtype.isInteger() && (sz < 32)) sz = 32;
    389         O << ".reg .b" << sz << " func_retval" << idx;
    390         if (j<je-1) O << ", ";
    391         ++idx;
    392       }
    393       if (i < e-1)
    394         O << ", ";
    395     }
    396   }
    397   O << ") ";
    398   return;
    399 }
    400 
    401 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
    402                                         raw_ostream &O) {
    403   const Function *F = MF.getFunction();
    404   printReturnValStr(F, O);
    405 }
    406 
    407 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
    408   SmallString<128> Str;
    409   raw_svector_ostream O(Str);
    410 
    411   // Set up
    412   MRI = &MF->getRegInfo();
    413   F = MF->getFunction();
    414   emitLinkageDirective(F,O);
    415   if (llvm::isKernelFunction(*F))
    416     O << ".entry ";
    417   else {
    418     O << ".func ";
    419     printReturnValStr(*MF, O);
    420   }
    421 
    422   O << *CurrentFnSym;
    423 
    424   emitFunctionParamList(*MF, O);
    425 
    426   if (llvm::isKernelFunction(*F))
    427     emitKernelFunctionDirectives(*F, O);
    428 
    429   OutStreamer.EmitRawText(O.str());
    430 
    431   prevDebugLoc = DebugLoc();
    432 }
    433 
    434 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
    435   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
    436   unsigned numRegClasses = TRI.getNumRegClasses();
    437   VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1];
    438   OutStreamer.EmitRawText(StringRef("{\n"));
    439   setAndEmitFunctionVirtualRegisters(*MF);
    440 
    441   SmallString<128> Str;
    442   raw_svector_ostream O(Str);
    443   emitDemotedVars(MF->getFunction(), O);
    444   OutStreamer.EmitRawText(O.str());
    445 }
    446 
    447 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
    448   OutStreamer.EmitRawText(StringRef("}\n"));
    449   delete []VRidGlobal2LocalMap;
    450 }
    451 
    452 
    453 void
    454 NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F,
    455                                               raw_ostream &O) const {
    456   // If the NVVM IR has some of reqntid* specified, then output
    457   // the reqntid directive, and set the unspecified ones to 1.
    458   // If none of reqntid* is specified, don't output reqntid directive.
    459   unsigned reqntidx, reqntidy, reqntidz;
    460   bool specified = false;
    461   if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1;
    462   else specified = true;
    463   if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1;
    464   else specified = true;
    465   if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1;
    466   else specified = true;
    467 
    468   if (specified)
    469     O << ".reqntid " << reqntidx << ", "
    470     << reqntidy << ", " << reqntidz << "\n";
    471 
    472   // If the NVVM IR has some of maxntid* specified, then output
    473   // the maxntid directive, and set the unspecified ones to 1.
    474   // If none of maxntid* is specified, don't output maxntid directive.
    475   unsigned maxntidx, maxntidy, maxntidz;
    476   specified = false;
    477   if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1;
    478   else specified = true;
    479   if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1;
    480   else specified = true;
    481   if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1;
    482   else specified = true;
    483 
    484   if (specified)
    485     O << ".maxntid " << maxntidx << ", "
    486     << maxntidy << ", " << maxntidz << "\n";
    487 
    488   unsigned mincta;
    489   if (llvm::getMinCTASm(F, mincta))
    490     O << ".minnctapersm " << mincta << "\n";
    491 }
    492 
    493 void
    494 NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
    495                                         raw_ostream &O) {
    496   const TargetRegisterClass * RC = MRI->getRegClass(vr);
    497   unsigned id = RC->getID();
    498 
    499   std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[id];
    500   unsigned mapped_vr = regmap[vr];
    501 
    502   if (!isVec) {
    503     O << getNVPTXRegClassStr(RC) << mapped_vr;
    504     return;
    505   }
    506   report_fatal_error("Bad register!");
    507 }
    508 
    509 void
    510 NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
    511                                      raw_ostream &O) {
    512   getVirtualRegisterName(vr, isVec, O);
    513 }
    514 
    515 void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO,
    516                                                 const char *Modifier,
    517                                                 raw_ostream &O) {
    518   static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'};
    519   int Imm = (int)MO.getImm();
    520   if(0 == strcmp(Modifier, "vecelem"))
    521     O << "_" << vecelem[Imm];
    522   else if(0 == strcmp(Modifier, "vecv4comm1")) {
    523     if((Imm < 0) || (Imm > 3))
    524       O << "//";
    525   }
    526   else if(0 == strcmp(Modifier, "vecv4comm2")) {
    527     if((Imm < 4) || (Imm > 7))
    528       O << "//";
    529   }
    530   else if(0 == strcmp(Modifier, "vecv4pos")) {
    531     if(Imm < 0) Imm = 0;
    532     O << "_" << vecelem[Imm%4];
    533   }
    534   else if(0 == strcmp(Modifier, "vecv2comm1")) {
    535     if((Imm < 0) || (Imm > 1))
    536       O << "//";
    537   }
    538   else if(0 == strcmp(Modifier, "vecv2comm2")) {
    539     if((Imm < 2) || (Imm > 3))
    540       O << "//";
    541   }
    542   else if(0 == strcmp(Modifier, "vecv2pos")) {
    543     if(Imm < 0) Imm = 0;
    544     O << "_" << vecelem[Imm%2];
    545   }
    546   else
    547     llvm_unreachable("Unknown Modifier on immediate operand");
    548 }
    549 
    550 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
    551                                    raw_ostream &O, const char *Modifier) {
    552   const MachineOperand &MO = MI->getOperand(opNum);
    553   switch (MO.getType()) {
    554   case MachineOperand::MO_Register:
    555     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
    556       if (MO.getReg() == NVPTX::VRDepot)
    557         O << DEPOTNAME << getFunctionNumber();
    558       else
    559         O << getRegisterName(MO.getReg());
    560     } else {
    561       if (!Modifier)
    562         emitVirtualRegister(MO.getReg(), false, O);
    563       else {
    564         if (strcmp(Modifier, "vecfull") == 0)
    565           emitVirtualRegister(MO.getReg(), true, O);
    566         else
    567           llvm_unreachable(
    568                  "Don't know how to handle the modifier on virtual register.");
    569       }
    570     }
    571     return;
    572 
    573   case MachineOperand::MO_Immediate:
    574     if (!Modifier)
    575       O << MO.getImm();
    576     else if (strstr(Modifier, "vec") == Modifier)
    577       printVecModifiedImmediate(MO, Modifier, O);
    578     else
    579       llvm_unreachable("Don't know how to handle modifier on immediate operand");
    580     return;
    581 
    582   case MachineOperand::MO_FPImmediate:
    583     printFPConstant(MO.getFPImm(), O);
    584     break;
    585 
    586   case MachineOperand::MO_GlobalAddress:
    587     O << *Mang->getSymbol(MO.getGlobal());
    588     break;
    589 
    590   case MachineOperand::MO_ExternalSymbol: {
    591     const char * symbname = MO.getSymbolName();
    592     if (strstr(symbname, ".PARAM") == symbname) {
    593       unsigned index;
    594       sscanf(symbname+6, "%u[];", &index);
    595       printParamName(index, O);
    596     }
    597     else if (strstr(symbname, ".HLPPARAM") == symbname) {
    598       unsigned index;
    599       sscanf(symbname+9, "%u[];", &index);
    600       O << *CurrentFnSym << "_param_" << index << "_offset";
    601     }
    602     else
    603       O << symbname;
    604     break;
    605   }
    606 
    607   case MachineOperand::MO_MachineBasicBlock:
    608     O << *MO.getMBB()->getSymbol();
    609     return;
    610 
    611   default:
    612     llvm_unreachable("Operand type not supported.");
    613   }
    614 }
    615 
    616 void NVPTXAsmPrinter::
    617 printImplicitDef(const MachineInstr *MI, raw_ostream &O) const {
    618 #ifndef __OPTIMIZE__
    619   O << "\t// Implicit def :";
    620   //printOperand(MI, 0);
    621   O << "\n";
    622 #endif
    623 }
    624 
    625 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
    626                                       raw_ostream &O, const char *Modifier) {
    627   printOperand(MI, opNum, O);
    628 
    629   if (Modifier && !strcmp(Modifier, "add")) {
    630     O << ", ";
    631     printOperand(MI, opNum+1, O);
    632   } else {
    633     if (MI->getOperand(opNum+1).isImm() &&
    634         MI->getOperand(opNum+1).getImm() == 0)
    635       return; // don't print ',0' or '+0'
    636     O << "+";
    637     printOperand(MI, opNum+1, O);
    638   }
    639 }
    640 
    641 void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
    642                                     raw_ostream &O, const char *Modifier)
    643 {
    644   if (Modifier) {
    645     const MachineOperand &MO = MI->getOperand(opNum);
    646     int Imm = (int)MO.getImm();
    647     if (!strcmp(Modifier, "volatile")) {
    648       if (Imm)
    649         O << ".volatile";
    650     } else if (!strcmp(Modifier, "addsp")) {
    651       switch (Imm) {
    652       case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break;
    653       case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break;
    654       case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break;
    655       case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break;
    656       case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break;
    657       case NVPTX::PTXLdStInstCode::GENERIC:
    658         if (!nvptxSubtarget.hasGenericLdSt())
    659           O << ".global";
    660         break;
    661       default:
    662         llvm_unreachable("Wrong Address Space");
    663       }
    664     }
    665     else if (!strcmp(Modifier, "sign")) {
    666       if (Imm==NVPTX::PTXLdStInstCode::Signed)
    667         O << "s";
    668       else if (Imm==NVPTX::PTXLdStInstCode::Unsigned)
    669         O << "u";
    670       else
    671         O << "f";
    672     }
    673     else if (!strcmp(Modifier, "vec")) {
    674       if (Imm==NVPTX::PTXLdStInstCode::V2)
    675         O << ".v2";
    676       else if (Imm==NVPTX::PTXLdStInstCode::V4)
    677         O << ".v4";
    678     }
    679     else
    680       llvm_unreachable("Unknown Modifier");
    681   }
    682   else
    683     llvm_unreachable("Empty Modifier");
    684 }
    685 
    686 void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
    687 
    688   emitLinkageDirective(F,O);
    689   if (llvm::isKernelFunction(*F))
    690     O << ".entry ";
    691   else
    692     O << ".func ";
    693   printReturnValStr(F, O);
    694   O << *CurrentFnSym << "\n";
    695   emitFunctionParamList(F, O);
    696   O << ";\n";
    697 }
    698 
    699 static bool usedInGlobalVarDef(const Constant *C)
    700 {
    701   if (!C)
    702     return false;
    703 
    704   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
    705     if (GV->getName().str() == "llvm.used")
    706       return false;
    707     return true;
    708   }
    709 
    710   for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
    711       ui!=ue; ++ui) {
    712     const Constant *C = dyn_cast<Constant>(*ui);
    713     if (usedInGlobalVarDef(C))
    714       return true;
    715   }
    716   return false;
    717 }
    718 
    719 static bool usedInOneFunc(const User *U, Function const *&oneFunc)
    720 {
    721   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
    722     if (othergv->getName().str() == "llvm.used")
    723       return true;
    724   }
    725 
    726   if (const Instruction *instr = dyn_cast<Instruction>(U)) {
    727     if (instr->getParent() && instr->getParent()->getParent()) {
    728       const Function *curFunc = instr->getParent()->getParent();
    729       if (oneFunc && (curFunc != oneFunc))
    730         return false;
    731       oneFunc = curFunc;
    732       return true;
    733     }
    734     else
    735       return false;
    736   }
    737 
    738   if (const MDNode *md = dyn_cast<MDNode>(U))
    739     if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
    740         (md->getName().str() == "llvm.dbg.sp")))
    741       return true;
    742 
    743 
    744   for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end();
    745       ui!=ue; ++ui) {
    746     if (usedInOneFunc(*ui, oneFunc) == false)
    747       return false;
    748   }
    749   return true;
    750 }
    751 
    752 /* Find out if a global variable can be demoted to local scope.
    753  * Currently, this is valid for CUDA shared variables, which have local
    754  * scope and global lifetime. So the conditions to check are :
    755  * 1. Is the global variable in shared address space?
    756  * 2. Does it have internal linkage?
    757  * 3. Is the global variable referenced only in one function?
    758  */
    759 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
    760   if (gv->hasInternalLinkage() == false)
    761     return false;
    762   const PointerType *Pty = gv->getType();
    763   if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
    764     return false;
    765 
    766   const Function *oneFunc = 0;
    767 
    768   bool flag = usedInOneFunc(gv, oneFunc);
    769   if (flag == false)
    770     return false;
    771   if (!oneFunc)
    772     return false;
    773   f = oneFunc;
    774   return true;
    775 }
    776 
    777 static bool useFuncSeen(const Constant *C,
    778                         llvm::DenseMap<const Function *, bool> &seenMap) {
    779   for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
    780       ui!=ue; ++ui) {
    781     if (const Constant *cu = dyn_cast<Constant>(*ui)) {
    782       if (useFuncSeen(cu, seenMap))
    783         return true;
    784     } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
    785       const BasicBlock *bb = I->getParent();
    786       if (!bb) continue;
    787       const Function *caller = bb->getParent();
    788       if (!caller) continue;
    789       if (seenMap.find(caller) != seenMap.end())
    790         return true;
    791     }
    792   }
    793   return false;
    794 }
    795 
    796 void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
    797   llvm::DenseMap<const Function *, bool> seenMap;
    798   for (Module::const_iterator FI=M.begin(), FE=M.end();
    799       FI!=FE; ++FI) {
    800     const Function *F = FI;
    801 
    802     if (F->isDeclaration()) {
    803       if (F->use_empty())
    804         continue;
    805       if (F->getIntrinsicID())
    806         continue;
    807       CurrentFnSym = Mang->getSymbol(F);
    808       emitDeclaration(F, O);
    809       continue;
    810     }
    811     for (Value::const_use_iterator iter=F->use_begin(),
    812         iterEnd=F->use_end(); iter!=iterEnd; ++iter) {
    813       if (const Constant *C = dyn_cast<Constant>(*iter)) {
    814         if (usedInGlobalVarDef(C)) {
    815           // The use is in the initialization of a global variable
    816           // that is a function pointer, so print a declaration
    817           // for the original function
    818           CurrentFnSym = Mang->getSymbol(F);
    819           emitDeclaration(F, O);
    820           break;
    821         }
    822         // Emit a declaration of this function if the function that
    823         // uses this constant expr has already been seen.
    824         if (useFuncSeen(C, seenMap)) {
    825           CurrentFnSym = Mang->getSymbol(F);
    826           emitDeclaration(F, O);
    827           break;
    828         }
    829       }
    830 
    831       if (!isa<Instruction>(*iter)) continue;
    832       const Instruction *instr = cast<Instruction>(*iter);
    833       const BasicBlock *bb = instr->getParent();
    834       if (!bb) continue;
    835       const Function *caller = bb->getParent();
    836       if (!caller) continue;
    837 
    838       // If a caller has already been seen, then the caller is
    839       // appearing in the module before the callee. so print out
    840       // a declaration for the callee.
    841       if (seenMap.find(caller) != seenMap.end()) {
    842         CurrentFnSym = Mang->getSymbol(F);
    843         emitDeclaration(F, O);
    844         break;
    845       }
    846     }
    847     seenMap[F] = true;
    848   }
    849 }
    850 
    851 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
    852   DebugInfoFinder DbgFinder;
    853   DbgFinder.processModule(M);
    854 
    855   unsigned i=1;
    856   for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
    857       E = DbgFinder.compile_unit_end(); I != E; ++I) {
    858     DICompileUnit DIUnit(*I);
    859     StringRef Filename(DIUnit.getFilename());
    860     StringRef Dirname(DIUnit.getDirectory());
    861     SmallString<128> FullPathName = Dirname;
    862     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
    863       sys::path::append(FullPathName, Filename);
    864       Filename = FullPathName.str();
    865     }
    866     if (filenameMap.find(Filename.str()) != filenameMap.end())
    867       continue;
    868     filenameMap[Filename.str()] = i;
    869     OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
    870     ++i;
    871   }
    872 
    873   for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
    874       E = DbgFinder.subprogram_end(); I != E; ++I) {
    875     DISubprogram SP(*I);
    876     StringRef Filename(SP.getFilename());
    877     StringRef Dirname(SP.getDirectory());
    878     SmallString<128> FullPathName = Dirname;
    879     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
    880       sys::path::append(FullPathName, Filename);
    881       Filename = FullPathName.str();
    882     }
    883     if (filenameMap.find(Filename.str()) != filenameMap.end())
    884       continue;
    885     filenameMap[Filename.str()] = i;
    886     ++i;
    887   }
    888 }
    889 
    890 bool NVPTXAsmPrinter::doInitialization (Module &M) {
    891 
    892   SmallString<128> Str1;
    893   raw_svector_ostream OS1(Str1);
    894 
    895   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
    896   MMI->AnalyzeModule(M);
    897 
    898   // We need to call the parent's one explicitly.
    899   //bool Result = AsmPrinter::doInitialization(M);
    900 
    901   // Initialize TargetLoweringObjectFile.
    902   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
    903           .Initialize(OutContext, TM);
    904 
    905   Mang = new Mangler(OutContext, *TM.getDataLayout());
    906 
    907   // Emit header before any dwarf directives are emitted below.
    908   emitHeader(M, OS1);
    909   OutStreamer.EmitRawText(OS1.str());
    910 
    911 
    912   // Already commented out
    913   //bool Result = AsmPrinter::doInitialization(M);
    914 
    915 
    916   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
    917     recordAndEmitFilenames(M);
    918 
    919   SmallString<128> Str2;
    920   raw_svector_ostream OS2(Str2);
    921 
    922   emitDeclarations(M, OS2);
    923 
    924   // As ptxas does not support forward references of globals, we need to first
    925   // sort the list of module-level globals in def-use order. We visit each
    926   // global variable in order, and ensure that we emit it *after* its dependent
    927   // globals. We use a little extra memory maintaining both a set and a list to
    928   // have fast searches while maintaining a strict ordering.
    929   SmallVector<GlobalVariable*,8> Globals;
    930   DenseSet<GlobalVariable*> GVVisited;
    931   DenseSet<GlobalVariable*> GVVisiting;
    932 
    933   // Visit each global variable, in order
    934   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
    935        I != E; ++I)
    936     VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
    937 
    938   assert(GVVisited.size() == M.getGlobalList().size() &&
    939          "Missed a global variable");
    940   assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
    941 
    942   // Print out module-level global variables in proper order
    943   for (unsigned i = 0, e = Globals.size(); i != e; ++i)
    944     printModuleLevelGV(Globals[i], OS2);
    945 
    946   OS2 << '\n';
    947 
    948   OutStreamer.EmitRawText(OS2.str());
    949   return false;  // success
    950 }
    951 
    952 void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) {
    953   O << "//\n";
    954   O << "// Generated by LLVM NVPTX Back-End\n";
    955   O << "//\n";
    956   O << "\n";
    957 
    958   unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
    959   O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
    960 
    961   O << ".target ";
    962   O << nvptxSubtarget.getTargetName();
    963 
    964   if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
    965     O << ", texmode_independent";
    966   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
    967     if (!nvptxSubtarget.hasDouble())
    968       O << ", map_f64_to_f32";
    969   }
    970 
    971   if (MAI->doesSupportDebugInformation())
    972     O << ", debug";
    973 
    974   O << "\n";
    975 
    976   O << ".address_size ";
    977   if (nvptxSubtarget.is64Bit())
    978     O << "64";
    979   else
    980     O << "32";
    981   O << "\n";
    982 
    983   O << "\n";
    984 }
    985 
    986 bool NVPTXAsmPrinter::doFinalization(Module &M) {
    987   // XXX Temproarily remove global variables so that doFinalization() will not
    988   // emit them again (global variables are emitted at beginning).
    989 
    990   Module::GlobalListType &global_list = M.getGlobalList();
    991   int i, n = global_list.size();
    992   GlobalVariable **gv_array = new GlobalVariable* [n];
    993 
    994   // first, back-up GlobalVariable in gv_array
    995   i = 0;
    996   for (Module::global_iterator I = global_list.begin(), E = global_list.end();
    997       I != E; ++I)
    998     gv_array[i++] = &*I;
    999 
   1000   // second, empty global_list
   1001   while (!global_list.empty())
   1002     global_list.remove(global_list.begin());
   1003 
   1004   // call doFinalization
   1005   bool ret = AsmPrinter::doFinalization(M);
   1006 
   1007   // now we restore global variables
   1008   for (i = 0; i < n; i ++)
   1009     global_list.insert(global_list.end(), gv_array[i]);
   1010 
   1011   delete[] gv_array;
   1012   return ret;
   1013 
   1014 
   1015   //bool Result = AsmPrinter::doFinalization(M);
   1016   // Instead of calling the parents doFinalization, we may
   1017   // clone parents doFinalization and customize here.
   1018   // Currently, we if NVISA out the EmitGlobals() in
   1019   // parent's doFinalization, which is too intrusive.
   1020   //
   1021   // Same for the doInitialization.
   1022   //return Result;
   1023 }
   1024 
   1025 // This function emits appropriate linkage directives for
   1026 // functions and global variables.
   1027 //
   1028 // extern function declaration            -> .extern
   1029 // extern function definition             -> .visible
   1030 // external global variable with init     -> .visible
   1031 // external without init                  -> .extern
   1032 // appending                              -> not allowed, assert.
   1033 
   1034 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
   1035 {
   1036   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
   1037     if (V->hasExternalLinkage()) {
   1038       if (isa<GlobalVariable>(V)) {
   1039         const GlobalVariable *GVar = cast<GlobalVariable>(V);
   1040         if (GVar) {
   1041           if (GVar->hasInitializer())
   1042             O << ".visible ";
   1043           else
   1044             O << ".extern ";
   1045         }
   1046       } else if (V->isDeclaration())
   1047         O << ".extern ";
   1048       else
   1049         O << ".visible ";
   1050     } else if (V->hasAppendingLinkage()) {
   1051       std::string msg;
   1052       msg.append("Error: ");
   1053       msg.append("Symbol ");
   1054       if (V->hasName())
   1055         msg.append(V->getName().str());
   1056       msg.append("has unsupported appending linkage type");
   1057       llvm_unreachable(msg.c_str());
   1058     }
   1059   }
   1060 }
   1061 
   1062 
   1063 void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
   1064                                          bool processDemoted) {
   1065 
   1066   // Skip meta data
   1067   if (GVar->hasSection()) {
   1068     if (GVar->getSection() == "llvm.metadata")
   1069       return;
   1070   }
   1071 
   1072   const DataLayout *TD = TM.getDataLayout();
   1073 
   1074   // GlobalVariables are always constant pointers themselves.
   1075   const PointerType *PTy = GVar->getType();
   1076   Type *ETy = PTy->getElementType();
   1077 
   1078   if (GVar->hasExternalLinkage()) {
   1079     if (GVar->hasInitializer())
   1080       O << ".visible ";
   1081     else
   1082       O << ".extern ";
   1083   }
   1084 
   1085   if (llvm::isTexture(*GVar)) {
   1086     O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
   1087     return;
   1088   }
   1089 
   1090   if (llvm::isSurface(*GVar)) {
   1091     O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
   1092     return;
   1093   }
   1094 
   1095   if (GVar->isDeclaration()) {
   1096     // (extern) declarations, no definition or initializer
   1097     // Currently the only known declaration is for an automatic __local
   1098     // (.shared) promoted to global.
   1099     emitPTXGlobalVariable(GVar, O);
   1100     O << ";\n";
   1101     return;
   1102   }
   1103 
   1104   if (llvm::isSampler(*GVar)) {
   1105     O << ".global .samplerref " << llvm::getSamplerName(*GVar);
   1106 
   1107     Constant *Initializer = NULL;
   1108     if (GVar->hasInitializer())
   1109       Initializer = GVar->getInitializer();
   1110     ConstantInt *CI = NULL;
   1111     if (Initializer)
   1112       CI = dyn_cast<ConstantInt>(Initializer);
   1113     if (CI) {
   1114       unsigned sample=CI->getZExtValue();
   1115 
   1116       O << " = { ";
   1117 
   1118       for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >>
   1119           __CLK_ADDRESS_BASE) ; i < 3 ; i++) {
   1120         O << "addr_mode_" << i << " = ";
   1121         switch (addr) {
   1122         case 0: O << "wrap"; break;
   1123         case 1: O << "clamp_to_border"; break;
   1124         case 2: O << "clamp_to_edge"; break;
   1125         case 3: O << "wrap"; break;
   1126         case 4: O << "mirror"; break;
   1127         }
   1128         O <<", ";
   1129       }
   1130       O << "filter_mode = ";
   1131       switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) {
   1132       case 0: O << "nearest"; break;
   1133       case 1: O << "linear";  break;
   1134       case 2: assert ( 0 && "Anisotropic filtering is not supported");
   1135       default: O << "nearest"; break;
   1136       }
   1137       if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) {
   1138         O << ", force_unnormalized_coords = 1";
   1139       }
   1140       O << " }";
   1141     }
   1142 
   1143     O << ";\n";
   1144     return;
   1145   }
   1146 
   1147   if (GVar->hasPrivateLinkage()) {
   1148 
   1149     if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
   1150       return;
   1151 
   1152     // FIXME - need better way (e.g. Metadata) to avoid generating this global
   1153     if (!strncmp(GVar->getName().data(), "filename", 8))
   1154       return;
   1155     if (GVar->use_empty())
   1156       return;
   1157   }
   1158 
   1159   const Function *demotedFunc = 0;
   1160   if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
   1161     O << "// " << GVar->getName().str() << " has been demoted\n";
   1162     if (localDecls.find(demotedFunc) != localDecls.end())
   1163       localDecls[demotedFunc].push_back(GVar);
   1164     else {
   1165       std::vector<GlobalVariable *> temp;
   1166       temp.push_back(GVar);
   1167       localDecls[demotedFunc] = temp;
   1168     }
   1169     return;
   1170   }
   1171 
   1172   O << ".";
   1173   emitPTXAddressSpace(PTy->getAddressSpace(), O);
   1174   if (GVar->getAlignment() == 0)
   1175     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
   1176   else
   1177     O << " .align " << GVar->getAlignment();
   1178 
   1179 
   1180   if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
   1181     O << " .";
   1182     O << getPTXFundamentalTypeStr(ETy, false);
   1183     O << " ";
   1184     O << *Mang->getSymbol(GVar);
   1185 
   1186     // Ptx allows variable initilization only for constant and global state
   1187     // spaces.
   1188     if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
   1189         (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
   1190         (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
   1191         && GVar->hasInitializer()) {
   1192       Constant *Initializer = GVar->getInitializer();
   1193       if (!Initializer->isNullValue()) {
   1194         O << " = " ;
   1195         printScalarConstant(Initializer, O);
   1196       }
   1197     }
   1198   } else {
   1199     unsigned int ElementSize =0;
   1200 
   1201     // Although PTX has direct support for struct type and array type and
   1202     // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
   1203     // targets that support these high level field accesses. Structs, arrays
   1204     // and vectors are lowered into arrays of bytes.
   1205     switch (ETy->getTypeID()) {
   1206     case Type::StructTyID:
   1207     case Type::ArrayTyID:
   1208     case Type::VectorTyID:
   1209       ElementSize = TD->getTypeStoreSize(ETy);
   1210       // Ptx allows variable initilization only for constant and
   1211       // global state spaces.
   1212       if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
   1213           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
   1214           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
   1215           && GVar->hasInitializer()) {
   1216         Constant *Initializer = GVar->getInitializer();
   1217         if (!isa<UndefValue>(Initializer) &&
   1218             !Initializer->isNullValue()) {
   1219           AggBuffer aggBuffer(ElementSize, O, *this);
   1220           bufferAggregateConstant(Initializer, &aggBuffer);
   1221           if (aggBuffer.numSymbols) {
   1222             if (nvptxSubtarget.is64Bit()) {
   1223               O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ;
   1224               O << ElementSize/8;
   1225             }
   1226             else {
   1227               O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ;
   1228               O << ElementSize/4;
   1229             }
   1230             O << "]";
   1231           }
   1232           else {
   1233             O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
   1234             O << ElementSize;
   1235             O << "]";
   1236           }
   1237           O << " = {" ;
   1238           aggBuffer.print();
   1239           O << "}";
   1240         }
   1241         else {
   1242           O << " .b8 " << *Mang->getSymbol(GVar) ;
   1243           if (ElementSize) {
   1244             O <<"[" ;
   1245             O << ElementSize;
   1246             O << "]";
   1247           }
   1248         }
   1249       }
   1250       else {
   1251         O << " .b8 " << *Mang->getSymbol(GVar);
   1252         if (ElementSize) {
   1253           O <<"[" ;
   1254           O << ElementSize;
   1255           O << "]";
   1256         }
   1257       }
   1258       break;
   1259     default:
   1260       assert( 0 && "type not supported yet");
   1261     }
   1262 
   1263   }
   1264   O << ";\n";
   1265 }
   1266 
   1267 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
   1268   if (localDecls.find(f) == localDecls.end())
   1269     return;
   1270 
   1271   std::vector<GlobalVariable *> &gvars = localDecls[f];
   1272 
   1273   for (unsigned i=0, e=gvars.size(); i!=e; ++i) {
   1274     O << "\t// demoted variable\n\t";
   1275     printModuleLevelGV(gvars[i], O, true);
   1276   }
   1277 }
   1278 
   1279 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
   1280                                           raw_ostream &O) const {
   1281   switch (AddressSpace) {
   1282   case llvm::ADDRESS_SPACE_LOCAL:
   1283     O << "local" ;
   1284     break;
   1285   case llvm::ADDRESS_SPACE_GLOBAL:
   1286     O << "global" ;
   1287     break;
   1288   case llvm::ADDRESS_SPACE_CONST:
   1289     // This logic should be consistent with that in
   1290     // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
   1291     if (nvptxSubtarget.hasGenericLdSt())
   1292       O << "global" ;
   1293     else
   1294       O << "const" ;
   1295     break;
   1296   case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
   1297     O << "const" ;
   1298     break;
   1299   case llvm::ADDRESS_SPACE_SHARED:
   1300     O << "shared" ;
   1301     break;
   1302   default:
   1303     report_fatal_error("Bad address space found while emitting PTX");
   1304     break;
   1305   }
   1306 }
   1307 
   1308 std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
   1309                                                       bool useB4PTR) const {
   1310   switch (Ty->getTypeID()) {
   1311   default:
   1312     llvm_unreachable("unexpected type");
   1313     break;
   1314   case Type::IntegerTyID: {
   1315     unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
   1316     if (NumBits == 1)
   1317       return "pred";
   1318     else if (NumBits <= 64) {
   1319       std::string name = "u";
   1320       return name + utostr(NumBits);
   1321     } else {
   1322       llvm_unreachable("Integer too large");
   1323       break;
   1324     }
   1325     break;
   1326   }
   1327   case Type::FloatTyID:
   1328     return "f32";
   1329   case Type::DoubleTyID:
   1330     return "f64";
   1331   case Type::PointerTyID:
   1332     if (nvptxSubtarget.is64Bit())
   1333       if (useB4PTR) return "b64";
   1334       else return "u64";
   1335     else
   1336       if (useB4PTR) return "b32";
   1337       else return "u32";
   1338   }
   1339   llvm_unreachable("unexpected type");
   1340   return NULL;
   1341 }
   1342 
   1343 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
   1344                                             raw_ostream &O) {
   1345 
   1346   const DataLayout *TD = TM.getDataLayout();
   1347 
   1348   // GlobalVariables are always constant pointers themselves.
   1349   const PointerType *PTy = GVar->getType();
   1350   Type *ETy = PTy->getElementType();
   1351 
   1352   O << ".";
   1353   emitPTXAddressSpace(PTy->getAddressSpace(), O);
   1354   if (GVar->getAlignment() == 0)
   1355     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
   1356   else
   1357     O << " .align " << GVar->getAlignment();
   1358 
   1359   if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
   1360     O << " .";
   1361     O << getPTXFundamentalTypeStr(ETy);
   1362     O << " ";
   1363     O << *Mang->getSymbol(GVar);
   1364     return;
   1365   }
   1366 
   1367   int64_t ElementSize =0;
   1368 
   1369   // Although PTX has direct support for struct type and array type and LLVM IR
   1370   // is very similar to PTX, the LLVM CodeGen does not support for targets that
   1371   // support these high level field accesses. Structs and arrays are lowered
   1372   // into arrays of bytes.
   1373   switch (ETy->getTypeID()) {
   1374   case Type::StructTyID:
   1375   case Type::ArrayTyID:
   1376   case Type::VectorTyID:
   1377     ElementSize = TD->getTypeStoreSize(ETy);
   1378     O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
   1379     if (ElementSize) {
   1380       O << itostr(ElementSize) ;
   1381     }
   1382     O << "]";
   1383     break;
   1384   default:
   1385     assert( 0 && "type not supported yet");
   1386   }
   1387   return ;
   1388 }
   1389 
   1390 
   1391 static unsigned int
   1392 getOpenCLAlignment(const DataLayout *TD,
   1393                    Type *Ty) {
   1394   if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
   1395     return TD->getPrefTypeAlignment(Ty);
   1396 
   1397   const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
   1398   if (ATy)
   1399     return getOpenCLAlignment(TD, ATy->getElementType());
   1400 
   1401   const VectorType *VTy = dyn_cast<VectorType>(Ty);
   1402   if (VTy) {
   1403     Type *ETy = VTy->getElementType();
   1404     unsigned int numE = VTy->getNumElements();
   1405     unsigned int alignE = TD->getPrefTypeAlignment(ETy);
   1406     if (numE == 3)
   1407       return 4*alignE;
   1408     else
   1409       return numE*alignE;
   1410   }
   1411 
   1412   const StructType *STy = dyn_cast<StructType>(Ty);
   1413   if (STy) {
   1414     unsigned int alignStruct = 1;
   1415     // Go through each element of the struct and find the
   1416     // largest alignment.
   1417     for (unsigned i=0, e=STy->getNumElements(); i != e; i++) {
   1418       Type *ETy = STy->getElementType(i);
   1419       unsigned int align = getOpenCLAlignment(TD, ETy);
   1420       if (align > alignStruct)
   1421         alignStruct = align;
   1422     }
   1423     return alignStruct;
   1424   }
   1425 
   1426   const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
   1427   if (FTy)
   1428     return TD->getPointerPrefAlignment();
   1429   return TD->getPrefTypeAlignment(Ty);
   1430 }
   1431 
   1432 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
   1433                                      int paramIndex, raw_ostream &O) {
   1434   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
   1435       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
   1436     O << *CurrentFnSym << "_param_" << paramIndex;
   1437   else {
   1438     std::string argName = I->getName();
   1439     const char *p = argName.c_str();
   1440     while (*p) {
   1441       if (*p == '.')
   1442         O << "_";
   1443       else
   1444         O << *p;
   1445       p++;
   1446     }
   1447   }
   1448 }
   1449 
   1450 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
   1451   Function::const_arg_iterator I, E;
   1452   int i = 0;
   1453 
   1454   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
   1455       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
   1456     O << *CurrentFnSym << "_param_" << paramIndex;
   1457     return;
   1458   }
   1459 
   1460   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
   1461     if (i==paramIndex) {
   1462       printParamName(I, paramIndex, O);
   1463       return;
   1464     }
   1465   }
   1466   llvm_unreachable("paramIndex out of bound");
   1467 }
   1468 
   1469 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
   1470                                             raw_ostream &O) {
   1471   const DataLayout *TD = TM.getDataLayout();
   1472   const AttributeSet &PAL = F->getAttributes();
   1473   const TargetLowering *TLI = TM.getTargetLowering();
   1474   Function::const_arg_iterator I, E;
   1475   unsigned paramIndex = 0;
   1476   bool first = true;
   1477   bool isKernelFunc = llvm::isKernelFunction(*F);
   1478   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
   1479   MVT thePointerTy = TLI->getPointerTy();
   1480 
   1481   O << "(\n";
   1482 
   1483   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
   1484     const Type *Ty = I->getType();
   1485 
   1486     if (!first)
   1487       O << ",\n";
   1488 
   1489     first = false;
   1490 
   1491     // Handle image/sampler parameters
   1492     if (llvm::isSampler(*I) || llvm::isImage(*I)) {
   1493       if (llvm::isImage(*I)) {
   1494         std::string sname = I->getName();
   1495         if (llvm::isImageWriteOnly(*I))
   1496           O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
   1497         else // Default image is read_only
   1498           O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
   1499       }
   1500       else // Should be llvm::isSampler(*I)
   1501         O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
   1502         << paramIndex;
   1503       continue;
   1504     }
   1505 
   1506     if (PAL.hasAttribute(paramIndex+1, Attribute::ByVal) == false) {
   1507       // Just a scalar
   1508       const PointerType *PTy = dyn_cast<PointerType>(Ty);
   1509       if (isKernelFunc) {
   1510         if (PTy) {
   1511           // Special handling for pointer arguments to kernel
   1512           O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
   1513 
   1514           if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
   1515             Type *ETy = PTy->getElementType();
   1516             int addrSpace = PTy->getAddressSpace();
   1517             switch(addrSpace) {
   1518             default:
   1519               O << ".ptr ";
   1520               break;
   1521             case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
   1522               O << ".ptr .const ";
   1523               break;
   1524             case llvm::ADDRESS_SPACE_SHARED:
   1525               O << ".ptr .shared ";
   1526               break;
   1527             case llvm::ADDRESS_SPACE_GLOBAL:
   1528             case llvm::ADDRESS_SPACE_CONST:
   1529               O << ".ptr .global ";
   1530               break;
   1531             }
   1532             O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " ";
   1533           }
   1534           printParamName(I, paramIndex, O);
   1535           continue;
   1536         }
   1537 
   1538         // non-pointer scalar to kernel func
   1539         O << "\t.param ."
   1540             << getPTXFundamentalTypeStr(Ty) << " ";
   1541         printParamName(I, paramIndex, O);
   1542         continue;
   1543       }
   1544       // Non-kernel function, just print .param .b<size> for ABI
   1545       // and .reg .b<size> for non ABY
   1546       unsigned sz = 0;
   1547       if (isa<IntegerType>(Ty)) {
   1548         sz = cast<IntegerType>(Ty)->getBitWidth();
   1549         if (sz < 32) sz = 32;
   1550       }
   1551       else if (isa<PointerType>(Ty))
   1552         sz = thePointerTy.getSizeInBits();
   1553       else
   1554         sz = Ty->getPrimitiveSizeInBits();
   1555       if (isABI)
   1556         O << "\t.param .b" << sz << " ";
   1557       else
   1558         O << "\t.reg .b" << sz << " ";
   1559       printParamName(I, paramIndex, O);
   1560       continue;
   1561     }
   1562 
   1563     // param has byVal attribute. So should be a pointer
   1564     const PointerType *PTy = dyn_cast<PointerType>(Ty);
   1565     assert(PTy &&
   1566            "Param with byval attribute should be a pointer type");
   1567     Type *ETy = PTy->getElementType();
   1568 
   1569     if (isABI || isKernelFunc) {
   1570       // Just print .param .b8 .align <a> .param[size];
   1571       // <a> = PAL.getparamalignment
   1572       // size = typeallocsize of element type
   1573       unsigned align = PAL.getParamAlignment(paramIndex+1);
   1574       if (align == 0)
   1575         align = TD->getABITypeAlignment(ETy);
   1576 
   1577       unsigned sz = TD->getTypeAllocSize(ETy);
   1578       O << "\t.param .align " << align
   1579           << " .b8 ";
   1580       printParamName(I, paramIndex, O);
   1581       O << "[" << sz << "]";
   1582       continue;
   1583     } else {
   1584       // Split the ETy into constituent parts and
   1585       // print .param .b<size> <name> for each part.
   1586       // Further, if a part is vector, print the above for
   1587       // each vector element.
   1588       SmallVector<EVT, 16> vtparts;
   1589       ComputeValueVTs(*TLI, ETy, vtparts);
   1590       for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
   1591         unsigned elems = 1;
   1592         EVT elemtype = vtparts[i];
   1593         if (vtparts[i].isVector()) {
   1594           elems = vtparts[i].getVectorNumElements();
   1595           elemtype = vtparts[i].getVectorElementType();
   1596         }
   1597 
   1598         for (unsigned j=0,je=elems; j!=je; ++j) {
   1599           unsigned sz = elemtype.getSizeInBits();
   1600           if (elemtype.isInteger() && (sz < 32)) sz = 32;
   1601           O << "\t.reg .b" << sz << " ";
   1602           printParamName(I, paramIndex, O);
   1603           if (j<je-1) O << ",\n";
   1604           ++paramIndex;
   1605         }
   1606         if (i<e-1)
   1607           O << ",\n";
   1608       }
   1609       --paramIndex;
   1610       continue;
   1611     }
   1612   }
   1613 
   1614   O << "\n)\n";
   1615 }
   1616 
   1617 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
   1618                                             raw_ostream &O) {
   1619   const Function *F = MF.getFunction();
   1620   emitFunctionParamList(F, O);
   1621 }
   1622 
   1623 
   1624 void NVPTXAsmPrinter::
   1625 setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
   1626   SmallString<128> Str;
   1627   raw_svector_ostream O(Str);
   1628 
   1629   // Map the global virtual register number to a register class specific
   1630   // virtual register number starting from 1 with that class.
   1631   const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
   1632   //unsigned numRegClasses = TRI->getNumRegClasses();
   1633 
   1634   // Emit the Fake Stack Object
   1635   const MachineFrameInfo *MFI = MF.getFrameInfo();
   1636   int NumBytes = (int) MFI->getStackSize();
   1637   if (NumBytes) {
   1638     O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t"
   1639         << DEPOTNAME
   1640         << getFunctionNumber() << "[" << NumBytes << "];\n";
   1641     if (nvptxSubtarget.is64Bit()) {
   1642       O << "\t.reg .b64 \t%SP;\n";
   1643       O << "\t.reg .b64 \t%SPL;\n";
   1644     }
   1645     else {
   1646       O << "\t.reg .b32 \t%SP;\n";
   1647       O << "\t.reg .b32 \t%SPL;\n";
   1648     }
   1649   }
   1650 
   1651   // Go through all virtual registers to establish the mapping between the
   1652   // global virtual
   1653   // register number and the per class virtual register number.
   1654   // We use the per class virtual register number in the ptx output.
   1655   unsigned int numVRs = MRI->getNumVirtRegs();
   1656   for (unsigned i=0; i< numVRs; i++) {
   1657     unsigned int vr = TRI->index2VirtReg(i);
   1658     const TargetRegisterClass *RC = MRI->getRegClass(vr);
   1659     std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[RC->getID()];
   1660     int n = regmap.size();
   1661     regmap.insert(std::make_pair(vr, n+1));
   1662   }
   1663 
   1664   // Emit register declarations
   1665   // @TODO: Extract out the real register usage
   1666   O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
   1667   O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
   1668   O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
   1669   O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
   1670   O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
   1671   O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
   1672   O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
   1673 
   1674   // Emit declaration of the virtual registers or 'physical' registers for
   1675   // each register class
   1676   //for (unsigned i=0; i< numRegClasses; i++) {
   1677   //    std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[i];
   1678   //    const TargetRegisterClass *RC = TRI->getRegClass(i);
   1679   //    std::string rcname = getNVPTXRegClassName(RC);
   1680   //    std::string rcStr = getNVPTXRegClassStr(RC);
   1681   //    //int n = regmap.size();
   1682   //    if (!isNVPTXVectorRegClass(RC)) {
   1683   //      O << "\t.reg " << rcname << " \t" << rcStr << "<"
   1684   //        << NVPTXNumRegisters << ">;\n";
   1685   //    }
   1686 
   1687   // Only declare those registers that may be used. And do not emit vector
   1688   // registers as
   1689   // they are all elementized to scalar registers.
   1690   //if (n && !isNVPTXVectorRegClass(RC)) {
   1691   //    if (RegAllocNilUsed) {
   1692   //        O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
   1693   //          << ">;\n";
   1694   //    }
   1695   //    else {
   1696   //        O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr)
   1697   //          << "<" << 32 << ">;\n";
   1698   //    }
   1699   //}
   1700   //}
   1701 
   1702   OutStreamer.EmitRawText(O.str());
   1703 }
   1704 
   1705 
   1706 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
   1707   APFloat APF = APFloat(Fp->getValueAPF());  // make a copy
   1708   bool ignored;
   1709   unsigned int numHex;
   1710   const char *lead;
   1711 
   1712   if (Fp->getType()->getTypeID()==Type::FloatTyID) {
   1713     numHex = 8;
   1714     lead = "0f";
   1715     APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
   1716                 &ignored);
   1717   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
   1718     numHex = 16;
   1719     lead = "0d";
   1720     APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
   1721                 &ignored);
   1722   } else
   1723     llvm_unreachable("unsupported fp type");
   1724 
   1725   APInt API = APF.bitcastToAPInt();
   1726   std::string hexstr(utohexstr(API.getZExtValue()));
   1727   O << lead;
   1728   if (hexstr.length() < numHex)
   1729     O << std::string(numHex - hexstr.length(), '0');
   1730   O << utohexstr(API.getZExtValue());
   1731 }
   1732 
   1733 void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
   1734   if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
   1735     O << CI->getValue();
   1736     return;
   1737   }
   1738   if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
   1739     printFPConstant(CFP, O);
   1740     return;
   1741   }
   1742   if (isa<ConstantPointerNull>(CPV)) {
   1743     O << "0";
   1744     return;
   1745   }
   1746   if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
   1747     O << *Mang->getSymbol(GVar);
   1748     return;
   1749   }
   1750   if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   1751     Value *v = Cexpr->stripPointerCasts();
   1752     if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
   1753       O << *Mang->getSymbol(GVar);
   1754       return;
   1755     } else {
   1756       O << *LowerConstant(CPV, *this);
   1757       return;
   1758     }
   1759   }
   1760   llvm_unreachable("Not scalar type found in printScalarConstant()");
   1761 }
   1762 
   1763 
   1764 void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
   1765                                    AggBuffer *aggBuffer) {
   1766 
   1767   const DataLayout *TD = TM.getDataLayout();
   1768 
   1769   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
   1770     int s = TD->getTypeAllocSize(CPV->getType());
   1771     if (s<Bytes)
   1772       s = Bytes;
   1773     aggBuffer->addZeros(s);
   1774     return;
   1775   }
   1776 
   1777   unsigned char *ptr;
   1778   switch (CPV->getType()->getTypeID()) {
   1779 
   1780   case Type::IntegerTyID: {
   1781     const Type *ETy = CPV->getType();
   1782     if ( ETy == Type::getInt8Ty(CPV->getContext()) ){
   1783       unsigned char c =
   1784           (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
   1785       ptr = &c;
   1786       aggBuffer->addBytes(ptr, 1, Bytes);
   1787     } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) {
   1788       short int16 =
   1789           (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
   1790       ptr = (unsigned char*)&int16;
   1791       aggBuffer->addBytes(ptr, 2, Bytes);
   1792     } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) {
   1793       if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
   1794         int int32 =(int)(constInt->getZExtValue());
   1795         ptr = (unsigned char*)&int32;
   1796         aggBuffer->addBytes(ptr, 4, Bytes);
   1797         break;
   1798       } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   1799         if (ConstantInt *constInt =
   1800             dyn_cast<ConstantInt>(ConstantFoldConstantExpression(
   1801                 Cexpr, TD))) {
   1802           int int32 =(int)(constInt->getZExtValue());
   1803           ptr = (unsigned char*)&int32;
   1804           aggBuffer->addBytes(ptr, 4, Bytes);
   1805           break;
   1806         }
   1807         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
   1808           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
   1809           aggBuffer->addSymbol(v);
   1810           aggBuffer->addZeros(4);
   1811           break;
   1812         }
   1813       }
   1814       llvm_unreachable("unsupported integer const type");
   1815     } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) {
   1816       if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
   1817         long long int64 =(long long)(constInt->getZExtValue());
   1818         ptr = (unsigned char*)&int64;
   1819         aggBuffer->addBytes(ptr, 8, Bytes);
   1820         break;
   1821       } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   1822         if (ConstantInt *constInt = dyn_cast<ConstantInt>(
   1823             ConstantFoldConstantExpression(Cexpr, TD))) {
   1824           long long int64 =(long long)(constInt->getZExtValue());
   1825           ptr = (unsigned char*)&int64;
   1826           aggBuffer->addBytes(ptr, 8, Bytes);
   1827           break;
   1828         }
   1829         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
   1830           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
   1831           aggBuffer->addSymbol(v);
   1832           aggBuffer->addZeros(8);
   1833           break;
   1834         }
   1835       }
   1836       llvm_unreachable("unsupported integer const type");
   1837     } else
   1838       llvm_unreachable("unsupported integer const type");
   1839     break;
   1840   }
   1841   case Type::FloatTyID:
   1842   case Type::DoubleTyID: {
   1843     ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
   1844     const Type* Ty = CFP->getType();
   1845     if (Ty == Type::getFloatTy(CPV->getContext())) {
   1846       float float32 = (float)CFP->getValueAPF().convertToFloat();
   1847       ptr = (unsigned char*)&float32;
   1848       aggBuffer->addBytes(ptr, 4, Bytes);
   1849     } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
   1850       double float64 = CFP->getValueAPF().convertToDouble();
   1851       ptr = (unsigned char*)&float64;
   1852       aggBuffer->addBytes(ptr, 8, Bytes);
   1853     }
   1854     else {
   1855       llvm_unreachable("unsupported fp const type");
   1856     }
   1857     break;
   1858   }
   1859   case Type::PointerTyID: {
   1860     if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
   1861       aggBuffer->addSymbol(GVar);
   1862     }
   1863     else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
   1864       Value *v = Cexpr->stripPointerCasts();
   1865       aggBuffer->addSymbol(v);
   1866     }
   1867     unsigned int s = TD->getTypeAllocSize(CPV->getType());
   1868     aggBuffer->addZeros(s);
   1869     break;
   1870   }
   1871 
   1872   case Type::ArrayTyID:
   1873   case Type::VectorTyID:
   1874   case Type::StructTyID: {
   1875     if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
   1876         isa<ConstantStruct>(CPV)) {
   1877       int ElementSize = TD->getTypeAllocSize(CPV->getType());
   1878       bufferAggregateConstant(CPV, aggBuffer);
   1879       if ( Bytes > ElementSize )
   1880         aggBuffer->addZeros(Bytes-ElementSize);
   1881     }
   1882     else if (isa<ConstantAggregateZero>(CPV))
   1883       aggBuffer->addZeros(Bytes);
   1884     else
   1885       llvm_unreachable("Unexpected Constant type");
   1886     break;
   1887   }
   1888 
   1889   default:
   1890     llvm_unreachable("unsupported type");
   1891   }
   1892 }
   1893 
   1894 void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
   1895                                               AggBuffer *aggBuffer) {
   1896   const DataLayout *TD = TM.getDataLayout();
   1897   int Bytes;
   1898 
   1899   // Old constants
   1900   if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
   1901     if (CPV->getNumOperands())
   1902       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
   1903         bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
   1904     return;
   1905   }
   1906 
   1907   if (const ConstantDataSequential *CDS =
   1908       dyn_cast<ConstantDataSequential>(CPV)) {
   1909     if (CDS->getNumElements())
   1910       for (unsigned i = 0; i < CDS->getNumElements(); ++i)
   1911         bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
   1912                      aggBuffer);
   1913     return;
   1914   }
   1915 
   1916 
   1917   if (isa<ConstantStruct>(CPV)) {
   1918     if (CPV->getNumOperands()) {
   1919       StructType *ST = cast<StructType>(CPV->getType());
   1920       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
   1921         if ( i == (e - 1))
   1922           Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
   1923           TD->getTypeAllocSize(ST)
   1924           - TD->getStructLayout(ST)->getElementOffset(i);
   1925         else
   1926           Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) -
   1927           TD->getStructLayout(ST)->getElementOffset(i);
   1928         bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes,
   1929                      aggBuffer);
   1930       }
   1931     }
   1932     return;
   1933   }
   1934   llvm_unreachable("unsupported constant type in printAggregateConstant()");
   1935 }
   1936 
   1937 // buildTypeNameMap - Run through symbol table looking for type names.
   1938 //
   1939 
   1940 
   1941 bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
   1942 
   1943   std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
   1944 
   1945   if (PI != TypeNameMap.end() &&
   1946       (!PI->second.compare("struct._image1d_t") ||
   1947           !PI->second.compare("struct._image2d_t") ||
   1948           !PI->second.compare("struct._image3d_t")))
   1949     return true;
   1950 
   1951   return false;
   1952 }
   1953 
   1954 /// PrintAsmOperand - Print out an operand for an inline asm expression.
   1955 ///
   1956 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   1957                                       unsigned AsmVariant,
   1958                                       const char *ExtraCode,
   1959                                       raw_ostream &O) {
   1960   if (ExtraCode && ExtraCode[0]) {
   1961     if (ExtraCode[1] != 0) return true; // Unknown modifier.
   1962 
   1963     switch (ExtraCode[0]) {
   1964     default:
   1965       // See if this is a generic print operand
   1966       return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
   1967     case 'r':
   1968       break;
   1969     }
   1970   }
   1971 
   1972   printOperand(MI, OpNo, O);
   1973 
   1974   return false;
   1975 }
   1976 
   1977 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   1978                                             unsigned OpNo,
   1979                                             unsigned AsmVariant,
   1980                                             const char *ExtraCode,
   1981                                             raw_ostream &O) {
   1982   if (ExtraCode && ExtraCode[0])
   1983     return true;  // Unknown modifier
   1984 
   1985   O << '[';
   1986   printMemOperand(MI, OpNo, O);
   1987   O << ']';
   1988 
   1989   return false;
   1990 }
   1991 
   1992 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
   1993 {
   1994   switch(MI.getOpcode()) {
   1995   default:
   1996     return false;
   1997   case NVPTX::CallArgBeginInst:  case NVPTX::CallArgEndInst0:
   1998   case NVPTX::CallArgEndInst1:  case NVPTX::CallArgF32:
   1999   case NVPTX::CallArgF64:  case NVPTX::CallArgI16:
   2000   case NVPTX::CallArgI32:  case NVPTX::CallArgI32imm:
   2001   case NVPTX::CallArgI64:  case NVPTX::CallArgI8:
   2002   case NVPTX::CallArgParam:  case NVPTX::CallVoidInst:
   2003   case NVPTX::CallVoidInstReg:  case NVPTX::Callseq_End:
   2004   case NVPTX::CallVoidInstReg64:
   2005   case NVPTX::DeclareParamInst:  case NVPTX::DeclareRetMemInst:
   2006   case NVPTX::DeclareRetRegInst:  case NVPTX::DeclareRetScalarInst:
   2007   case NVPTX::DeclareScalarParamInst:  case NVPTX::DeclareScalarRegInst:
   2008   case NVPTX::StoreParamF32:  case NVPTX::StoreParamF64:
   2009   case NVPTX::StoreParamI16:  case NVPTX::StoreParamI32:
   2010   case NVPTX::StoreParamI64:  case NVPTX::StoreParamI8:
   2011   case NVPTX::StoreParamS32I8:  case NVPTX::StoreParamU32I8:
   2012   case NVPTX::StoreParamS32I16:  case NVPTX::StoreParamU32I16:
   2013   case NVPTX::StoreRetvalF32:  case NVPTX::StoreRetvalF64:
   2014   case NVPTX::StoreRetvalI16:  case NVPTX::StoreRetvalI32:
   2015   case NVPTX::StoreRetvalI64:  case NVPTX::StoreRetvalI8:
   2016   case NVPTX::LastCallArgF32:  case NVPTX::LastCallArgF64:
   2017   case NVPTX::LastCallArgI16:  case NVPTX::LastCallArgI32:
   2018   case NVPTX::LastCallArgI32imm:  case NVPTX::LastCallArgI64:
   2019   case NVPTX::LastCallArgI8:  case NVPTX::LastCallArgParam:
   2020   case NVPTX::LoadParamMemF32:  case NVPTX::LoadParamMemF64:
   2021   case NVPTX::LoadParamMemI16:  case NVPTX::LoadParamMemI32:
   2022   case NVPTX::LoadParamMemI64:  case NVPTX::LoadParamMemI8:
   2023   case NVPTX::LoadParamRegF32:  case NVPTX::LoadParamRegF64:
   2024   case NVPTX::LoadParamRegI16:  case NVPTX::LoadParamRegI32:
   2025   case NVPTX::LoadParamRegI64:  case NVPTX::LoadParamRegI8:
   2026   case NVPTX::PrototypeInst:   case NVPTX::DBG_VALUE:
   2027     return true;
   2028   }
   2029   return false;
   2030 }
   2031 
   2032 // Force static initialization.
   2033 extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
   2034   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
   2035   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
   2036 }
   2037 
   2038 
   2039 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   2040   std::stringstream temp;
   2041   LineReader * reader = this->getReader(filename.str());
   2042   temp << "\n//";
   2043   temp << filename.str();
   2044   temp << ":";
   2045   temp << line;
   2046   temp << " ";
   2047   temp << reader->readLine(line);
   2048   temp << "\n";
   2049   this->OutStreamer.EmitRawText(Twine(temp.str()));
   2050 }
   2051 
   2052 
   2053 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
   2054   if (reader == NULL)  {
   2055     reader =  new LineReader(filename);
   2056   }
   2057 
   2058   if (reader->fileName() != filename) {
   2059     delete reader;
   2060     reader =  new LineReader(filename);
   2061   }
   2062 
   2063   return reader;
   2064 }
   2065 
   2066 
   2067 std::string
   2068 LineReader::readLine(unsigned lineNum) {
   2069   if (lineNum < theCurLine) {
   2070     theCurLine = 0;
   2071     fstr.seekg(0,std::ios::beg);
   2072   }
   2073   while (theCurLine < lineNum) {
   2074     fstr.getline(buff,500);
   2075     theCurLine++;
   2076   }
   2077   return buff;
   2078 }
   2079 
   2080 // Force static initialization.
   2081 extern "C" void LLVMInitializeNVPTXAsmPrinter() {
   2082   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
   2083   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
   2084 }
   2085