Home | History | Annotate | Download | only in NVPTX
      1 //
      2 //                     The LLVM Compiler Infrastructure
      3 //
      4 // This file is distributed under the University of Illinois Open Source
      5 // License. See LICENSE.TXT for details.
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
     10 // selection DAG.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 
     15 #include "NVPTXISelLowering.h"
     16 #include "NVPTX.h"
     17 #include "NVPTXTargetMachine.h"
     18 #include "NVPTXTargetObjectFile.h"
     19 #include "NVPTXUtilities.h"
     20 #include "llvm/CodeGen/Analysis.h"
     21 #include "llvm/CodeGen/MachineFrameInfo.h"
     22 #include "llvm/CodeGen/MachineFunction.h"
     23 #include "llvm/CodeGen/MachineInstrBuilder.h"
     24 #include "llvm/CodeGen/MachineRegisterInfo.h"
     25 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
     26 #include "llvm/IR/DerivedTypes.h"
     27 #include "llvm/IR/Function.h"
     28 #include "llvm/IR/GlobalValue.h"
     29 #include "llvm/IR/IntrinsicInst.h"
     30 #include "llvm/IR/Intrinsics.h"
     31 #include "llvm/IR/Module.h"
     32 #include "llvm/MC/MCSectionELF.h"
     33 #include "llvm/Support/CallSite.h"
     34 #include "llvm/Support/CommandLine.h"
     35 #include "llvm/Support/Debug.h"
     36 #include "llvm/Support/ErrorHandling.h"
     37 #include "llvm/Support/raw_ostream.h"
     38 #include <sstream>
     39 
     40 #undef DEBUG_TYPE
     41 #define DEBUG_TYPE "nvptx-lower"
     42 
     43 using namespace llvm;
     44 
     45 static unsigned int uniqueCallSite = 0;
     46 
     47 static cl::opt<bool>
     48 sched4reg("nvptx-sched4reg",
     49           cl::desc("NVPTX Specific: schedule for register pressue"),
     50           cl::init(false));
     51 
     52 static bool IsPTXVectorType(MVT VT) {
     53   switch (VT.SimpleTy) {
     54   default: return false;
     55   case MVT::v2i8:
     56   case MVT::v4i8:
     57   case MVT::v2i16:
     58   case MVT::v4i16:
     59   case MVT::v2i32:
     60   case MVT::v4i32:
     61   case MVT::v2i64:
     62   case MVT::v2f32:
     63   case MVT::v4f32:
     64   case MVT::v2f64:
     65   return true;
     66   }
     67 }
     68 
     69 // NVPTXTargetLowering Constructor.
     70 NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
     71 : TargetLowering(TM, new NVPTXTargetObjectFile()),
     72   nvTM(&TM),
     73   nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
     74 
     75   // always lower memset, memcpy, and memmove intrinsics to load/store
     76   // instructions, rather
     77   // then generating calls to memset, mempcy or memmove.
     78   MaxStoresPerMemset = (unsigned)0xFFFFFFFF;
     79   MaxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
     80   MaxStoresPerMemmove = (unsigned)0xFFFFFFFF;
     81 
     82   setBooleanContents(ZeroOrNegativeOneBooleanContent);
     83 
     84   // Jump is Expensive. Don't create extra control flow for 'and', 'or'
     85   // condition branches.
     86   setJumpIsExpensive(true);
     87 
     88   // By default, use the Source scheduling
     89   if (sched4reg)
     90     setSchedulingPreference(Sched::RegPressure);
     91   else
     92     setSchedulingPreference(Sched::Source);
     93 
     94   addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
     95   addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass);
     96   addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
     97   addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
     98   addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
     99   addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
    100   addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
    101 
    102   // Operations not directly supported by NVPTX.
    103   setOperationAction(ISD::SELECT_CC,         MVT::Other, Expand);
    104   setOperationAction(ISD::BR_CC,             MVT::f32, Expand);
    105   setOperationAction(ISD::BR_CC,             MVT::f64, Expand);
    106   setOperationAction(ISD::BR_CC,             MVT::i1,  Expand);
    107   setOperationAction(ISD::BR_CC,             MVT::i8,  Expand);
    108   setOperationAction(ISD::BR_CC,             MVT::i16, Expand);
    109   setOperationAction(ISD::BR_CC,             MVT::i32, Expand);
    110   setOperationAction(ISD::BR_CC,             MVT::i64, Expand);
    111   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
    112   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
    113   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
    114   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
    115   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
    116 
    117   if (nvptxSubtarget.hasROT64()) {
    118     setOperationAction(ISD::ROTL , MVT::i64, Legal);
    119     setOperationAction(ISD::ROTR , MVT::i64, Legal);
    120   }
    121   else {
    122     setOperationAction(ISD::ROTL , MVT::i64, Expand);
    123     setOperationAction(ISD::ROTR , MVT::i64, Expand);
    124   }
    125   if (nvptxSubtarget.hasROT32()) {
    126     setOperationAction(ISD::ROTL , MVT::i32, Legal);
    127     setOperationAction(ISD::ROTR , MVT::i32, Legal);
    128   }
    129   else {
    130     setOperationAction(ISD::ROTL , MVT::i32, Expand);
    131     setOperationAction(ISD::ROTR , MVT::i32, Expand);
    132   }
    133 
    134   setOperationAction(ISD::ROTL , MVT::i16, Expand);
    135   setOperationAction(ISD::ROTR , MVT::i16, Expand);
    136   setOperationAction(ISD::ROTL , MVT::i8, Expand);
    137   setOperationAction(ISD::ROTR , MVT::i8, Expand);
    138   setOperationAction(ISD::BSWAP , MVT::i16, Expand);
    139   setOperationAction(ISD::BSWAP , MVT::i32, Expand);
    140   setOperationAction(ISD::BSWAP , MVT::i64, Expand);
    141 
    142   // Indirect branch is not supported.
    143   // This also disables Jump Table creation.
    144   setOperationAction(ISD::BR_JT,             MVT::Other, Expand);
    145   setOperationAction(ISD::BRIND,             MVT::Other, Expand);
    146 
    147   setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
    148   setOperationAction(ISD::GlobalAddress   , MVT::i64  , Custom);
    149 
    150   // We want to legalize constant related memmove and memcopy
    151   // intrinsics.
    152   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
    153 
    154   // Turn FP extload into load/fextend
    155   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
    156   // Turn FP truncstore into trunc + store.
    157   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    158 
    159   // PTX does not support load / store predicate registers
    160   setOperationAction(ISD::LOAD, MVT::i1, Custom);
    161   setOperationAction(ISD::STORE, MVT::i1, Custom);
    162 
    163   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
    164   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
    165   setTruncStoreAction(MVT::i64, MVT::i1, Expand);
    166   setTruncStoreAction(MVT::i32, MVT::i1, Expand);
    167   setTruncStoreAction(MVT::i16, MVT::i1, Expand);
    168   setTruncStoreAction(MVT::i8, MVT::i1, Expand);
    169 
    170   // This is legal in NVPTX
    171   setOperationAction(ISD::ConstantFP,         MVT::f64, Legal);
    172   setOperationAction(ISD::ConstantFP,         MVT::f32, Legal);
    173 
    174   // TRAP can be lowered to PTX trap
    175   setOperationAction(ISD::TRAP,               MVT::Other, Legal);
    176 
    177   // Register custom handling for vector loads/stores
    178   for (int i = MVT::FIRST_VECTOR_VALUETYPE;
    179        i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
    180     MVT VT = (MVT::SimpleValueType)i;
    181     if (IsPTXVectorType(VT)) {
    182       setOperationAction(ISD::LOAD, VT, Custom);
    183       setOperationAction(ISD::STORE, VT, Custom);
    184       setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
    185     }
    186   }
    187 
    188   // Now deduce the information based on the above mentioned
    189   // actions
    190   computeRegisterProperties();
    191 }
    192 
    193 
    194 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
    195   switch (Opcode) {
    196   default: return 0;
    197   case NVPTXISD::CALL:            return "NVPTXISD::CALL";
    198   case NVPTXISD::RET_FLAG:        return "NVPTXISD::RET_FLAG";
    199   case NVPTXISD::Wrapper:         return "NVPTXISD::Wrapper";
    200   case NVPTXISD::NVBuiltin:       return "NVPTXISD::NVBuiltin";
    201   case NVPTXISD::DeclareParam:    return "NVPTXISD::DeclareParam";
    202   case NVPTXISD::DeclareScalarParam:
    203     return "NVPTXISD::DeclareScalarParam";
    204   case NVPTXISD::DeclareRet:      return "NVPTXISD::DeclareRet";
    205   case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam";
    206   case NVPTXISD::PrintCall:       return "NVPTXISD::PrintCall";
    207   case NVPTXISD::LoadParam:       return "NVPTXISD::LoadParam";
    208   case NVPTXISD::StoreParam:      return "NVPTXISD::StoreParam";
    209   case NVPTXISD::StoreParamS32:   return "NVPTXISD::StoreParamS32";
    210   case NVPTXISD::StoreParamU32:   return "NVPTXISD::StoreParamU32";
    211   case NVPTXISD::MoveToParam:     return "NVPTXISD::MoveToParam";
    212   case NVPTXISD::CallArgBegin:    return "NVPTXISD::CallArgBegin";
    213   case NVPTXISD::CallArg:         return "NVPTXISD::CallArg";
    214   case NVPTXISD::LastCallArg:     return "NVPTXISD::LastCallArg";
    215   case NVPTXISD::CallArgEnd:      return "NVPTXISD::CallArgEnd";
    216   case NVPTXISD::CallVoid:        return "NVPTXISD::CallVoid";
    217   case NVPTXISD::CallVal:         return "NVPTXISD::CallVal";
    218   case NVPTXISD::CallSymbol:      return "NVPTXISD::CallSymbol";
    219   case NVPTXISD::Prototype:       return "NVPTXISD::Prototype";
    220   case NVPTXISD::MoveParam:       return "NVPTXISD::MoveParam";
    221   case NVPTXISD::MoveRetval:      return "NVPTXISD::MoveRetval";
    222   case NVPTXISD::MoveToRetval:    return "NVPTXISD::MoveToRetval";
    223   case NVPTXISD::StoreRetval:     return "NVPTXISD::StoreRetval";
    224   case NVPTXISD::PseudoUseParam:  return "NVPTXISD::PseudoUseParam";
    225   case NVPTXISD::RETURN:          return "NVPTXISD::RETURN";
    226   case NVPTXISD::CallSeqBegin:    return "NVPTXISD::CallSeqBegin";
    227   case NVPTXISD::CallSeqEnd:      return "NVPTXISD::CallSeqEnd";
    228   case NVPTXISD::LoadV2:          return "NVPTXISD::LoadV2";
    229   case NVPTXISD::LoadV4:          return "NVPTXISD::LoadV4";
    230   case NVPTXISD::LDGV2:           return "NVPTXISD::LDGV2";
    231   case NVPTXISD::LDGV4:           return "NVPTXISD::LDGV4";
    232   case NVPTXISD::LDUV2:           return "NVPTXISD::LDUV2";
    233   case NVPTXISD::LDUV4:           return "NVPTXISD::LDUV4";
    234   case NVPTXISD::StoreV2:         return "NVPTXISD::StoreV2";
    235   case NVPTXISD::StoreV4:         return "NVPTXISD::StoreV4";
    236   }
    237 }
    238 
    239 bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const {
    240   return VT == MVT::i1;
    241 }
    242 
    243 SDValue
    244 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
    245   DebugLoc dl = Op.getDebugLoc();
    246   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
    247   Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
    248   return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
    249 }
    250 
    251 std::string NVPTXTargetLowering::getPrototype(Type *retTy,
    252                                               const ArgListTy &Args,
    253                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
    254                                               unsigned retAlignment) const {
    255 
    256   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
    257 
    258   std::stringstream O;
    259   O << "prototype_" << uniqueCallSite << " : .callprototype ";
    260 
    261   if (retTy->getTypeID() == Type::VoidTyID)
    262     O << "()";
    263   else {
    264     O << "(";
    265     if (isABI) {
    266       if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
    267         unsigned size = 0;
    268         if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
    269           size = ITy->getBitWidth();
    270           if (size < 32) size = 32;
    271         }
    272         else {
    273           assert(retTy->isFloatingPointTy() &&
    274                  "Floating point type expected here");
    275           size = retTy->getPrimitiveSizeInBits();
    276         }
    277 
    278         O << ".param .b" << size << " _";
    279       }
    280       else if (isa<PointerType>(retTy))
    281         O << ".param .b" << getPointerTy().getSizeInBits()
    282         << " _";
    283       else {
    284         if ((retTy->getTypeID() == Type::StructTyID) ||
    285             isa<VectorType>(retTy)) {
    286           SmallVector<EVT, 16> vtparts;
    287           ComputeValueVTs(*this, retTy, vtparts);
    288           unsigned totalsz = 0;
    289           for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
    290             unsigned elems = 1;
    291             EVT elemtype = vtparts[i];
    292             if (vtparts[i].isVector()) {
    293               elems = vtparts[i].getVectorNumElements();
    294               elemtype = vtparts[i].getVectorElementType();
    295             }
    296             for (unsigned j=0, je=elems; j!=je; ++j) {
    297               unsigned sz = elemtype.getSizeInBits();
    298               if (elemtype.isInteger() && (sz < 8)) sz = 8;
    299               totalsz += sz/8;
    300             }
    301           }
    302           O << ".param .align "
    303               << retAlignment
    304               << " .b8 _["
    305               << totalsz << "]";
    306         }
    307         else {
    308           assert(false &&
    309                  "Unknown return type");
    310         }
    311       }
    312     }
    313     else {
    314       SmallVector<EVT, 16> vtparts;
    315       ComputeValueVTs(*this, retTy, vtparts);
    316       unsigned idx = 0;
    317       for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
    318         unsigned elems = 1;
    319         EVT elemtype = vtparts[i];
    320         if (vtparts[i].isVector()) {
    321           elems = vtparts[i].getVectorNumElements();
    322           elemtype = vtparts[i].getVectorElementType();
    323         }
    324 
    325         for (unsigned j=0, je=elems; j!=je; ++j) {
    326           unsigned sz = elemtype.getSizeInBits();
    327           if (elemtype.isInteger() && (sz < 32)) sz = 32;
    328           O << ".reg .b" << sz << " _";
    329           if (j<je-1) O << ", ";
    330           ++idx;
    331         }
    332         if (i < e-1)
    333           O << ", ";
    334       }
    335     }
    336     O << ") ";
    337   }
    338   O << "_ (";
    339 
    340   bool first = true;
    341   MVT thePointerTy = getPointerTy();
    342 
    343   for (unsigned i=0,e=Args.size(); i!=e; ++i) {
    344     const Type *Ty = Args[i].Ty;
    345     if (!first) {
    346       O << ", ";
    347     }
    348     first = false;
    349 
    350     if (Outs[i].Flags.isByVal() == false) {
    351       unsigned sz = 0;
    352       if (isa<IntegerType>(Ty)) {
    353         sz = cast<IntegerType>(Ty)->getBitWidth();
    354         if (sz < 32) sz = 32;
    355       }
    356       else if (isa<PointerType>(Ty))
    357         sz = thePointerTy.getSizeInBits();
    358       else
    359         sz = Ty->getPrimitiveSizeInBits();
    360       if (isABI)
    361         O << ".param .b" << sz << " ";
    362       else
    363         O << ".reg .b" << sz << " ";
    364       O << "_";
    365       continue;
    366     }
    367     const PointerType *PTy = dyn_cast<PointerType>(Ty);
    368     assert(PTy &&
    369            "Param with byval attribute should be a pointer type");
    370     Type *ETy = PTy->getElementType();
    371 
    372     if (isABI) {
    373       unsigned align = Outs[i].Flags.getByValAlign();
    374       unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
    375       O << ".param .align " << align
    376           << " .b8 ";
    377       O << "_";
    378       O << "[" << sz << "]";
    379       continue;
    380     }
    381     else {
    382       SmallVector<EVT, 16> vtparts;
    383       ComputeValueVTs(*this, ETy, vtparts);
    384       for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
    385         unsigned elems = 1;
    386         EVT elemtype = vtparts[i];
    387         if (vtparts[i].isVector()) {
    388           elems = vtparts[i].getVectorNumElements();
    389           elemtype = vtparts[i].getVectorElementType();
    390         }
    391 
    392         for (unsigned j=0,je=elems; j!=je; ++j) {
    393           unsigned sz = elemtype.getSizeInBits();
    394           if (elemtype.isInteger() && (sz < 32)) sz = 32;
    395           O << ".reg .b" << sz << " ";
    396           O << "_";
    397           if (j<je-1) O << ", ";
    398         }
    399         if (i<e-1)
    400           O << ", ";
    401       }
    402       continue;
    403     }
    404   }
    405   O << ");";
    406   return O.str();
    407 }
    408 
    409 
    410 SDValue
    411 NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
    412                                SmallVectorImpl<SDValue> &InVals) const {
    413   SelectionDAG &DAG                     = CLI.DAG;
    414   DebugLoc &dl                          = CLI.DL;
    415   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
    416   SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
    417   SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
    418   SDValue Chain                         = CLI.Chain;
    419   SDValue Callee                        = CLI.Callee;
    420   bool &isTailCall                      = CLI.IsTailCall;
    421   ArgListTy &Args                       = CLI.Args;
    422   Type *retTy                           = CLI.RetTy;
    423   ImmutableCallSite *CS                 = CLI.CS;
    424 
    425   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
    426 
    427   SDValue tempChain = Chain;
    428   Chain = DAG.getCALLSEQ_START(Chain,
    429                                DAG.getIntPtrConstant(uniqueCallSite, true));
    430   SDValue InFlag = Chain.getValue(1);
    431 
    432   assert((Outs.size() == Args.size()) &&
    433          "Unexpected number of arguments to function call");
    434   unsigned paramCount = 0;
    435   // Declare the .params or .reg need to pass values
    436   // to the function
    437   for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
    438     EVT VT = Outs[i].VT;
    439 
    440     if (Outs[i].Flags.isByVal() == false) {
    441       // Plain scalar
    442       // for ABI,    declare .param .b<size> .param<n>;
    443       // for nonABI, declare .reg .b<size> .param<n>;
    444       unsigned isReg = 1;
    445       if (isABI)
    446         isReg = 0;
    447       unsigned sz = VT.getSizeInBits();
    448       if (VT.isInteger() && (sz < 32)) sz = 32;
    449       SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    450       SDValue DeclareParamOps[] = { Chain,
    451                                     DAG.getConstant(paramCount, MVT::i32),
    452                                     DAG.getConstant(sz, MVT::i32),
    453                                     DAG.getConstant(isReg, MVT::i32),
    454                                     InFlag };
    455       Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
    456                           DeclareParamOps, 5);
    457       InFlag = Chain.getValue(1);
    458       SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    459       SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
    460                              DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };
    461 
    462       unsigned opcode = NVPTXISD::StoreParam;
    463       if (isReg)
    464         opcode = NVPTXISD::MoveToParam;
    465       else {
    466         if (Outs[i].Flags.isZExt())
    467           opcode = NVPTXISD::StoreParamU32;
    468         else if (Outs[i].Flags.isSExt())
    469           opcode = NVPTXISD::StoreParamS32;
    470       }
    471       Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5);
    472 
    473       InFlag = Chain.getValue(1);
    474       ++paramCount;
    475       continue;
    476     }
    477     // struct or vector
    478     SmallVector<EVT, 16> vtparts;
    479     const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
    480     assert(PTy &&
    481            "Type of a byval parameter should be pointer");
    482     ComputeValueVTs(*this, PTy->getElementType(), vtparts);
    483 
    484     if (isABI) {
    485       // declare .param .align 16 .b8 .param<n>[<size>];
    486       unsigned sz = Outs[i].Flags.getByValSize();
    487       SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    488       // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
    489       // don't need to
    490       // worry about natural alignment or not. See TargetLowering::LowerCallTo()
    491       SDValue DeclareParamOps[] = { Chain,
    492                        DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
    493                                     DAG.getConstant(paramCount, MVT::i32),
    494                                     DAG.getConstant(sz, MVT::i32),
    495                                     InFlag };
    496       Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
    497                           DeclareParamOps, 5);
    498       InFlag = Chain.getValue(1);
    499       unsigned curOffset = 0;
    500       for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
    501         unsigned elems = 1;
    502         EVT elemtype = vtparts[j];
    503         if (vtparts[j].isVector()) {
    504           elems = vtparts[j].getVectorNumElements();
    505           elemtype = vtparts[j].getVectorElementType();
    506         }
    507         for (unsigned k=0,ke=elems; k!=ke; ++k) {
    508           unsigned sz = elemtype.getSizeInBits();
    509           if (elemtype.isInteger() && (sz < 8)) sz = 8;
    510           SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
    511                                         OutVals[i],
    512                                         DAG.getConstant(curOffset,
    513                                                         getPointerTy()));
    514           SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
    515                                 MachinePointerInfo(), false, false, false, 0);
    516           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    517           SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
    518                                                             MVT::i32),
    519                                            DAG.getConstant(curOffset, MVT::i32),
    520                                                             theVal, InFlag };
    521           Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
    522                               CopyParamOps, 5);
    523           InFlag = Chain.getValue(1);
    524           curOffset += sz/8;
    525         }
    526       }
    527       ++paramCount;
    528       continue;
    529     }
    530     // Non-abi, struct or vector
    531     // Declare a bunch or .reg .b<size> .param<n>
    532     unsigned curOffset = 0;
    533     for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
    534       unsigned elems = 1;
    535       EVT elemtype = vtparts[j];
    536       if (vtparts[j].isVector()) {
    537         elems = vtparts[j].getVectorNumElements();
    538         elemtype = vtparts[j].getVectorElementType();
    539       }
    540       for (unsigned k=0,ke=elems; k!=ke; ++k) {
    541         unsigned sz = elemtype.getSizeInBits();
    542         if (elemtype.isInteger() && (sz < 32)) sz = 32;
    543         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    544         SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
    545                                                              MVT::i32),
    546                                                   DAG.getConstant(sz, MVT::i32),
    547                                                    DAG.getConstant(1, MVT::i32),
    548                                                              InFlag };
    549         Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
    550                             DeclareParamOps, 5);
    551         InFlag = Chain.getValue(1);
    552         SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
    553                                       DAG.getConstant(curOffset,
    554                                                       getPointerTy()));
    555         SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
    556                                   MachinePointerInfo(), false, false, false, 0);
    557         SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    558         SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
    559                                    DAG.getConstant(0, MVT::i32), theVal,
    560                                    InFlag };
    561         Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs,
    562                             CopyParamOps, 5);
    563         InFlag = Chain.getValue(1);
    564         ++paramCount;
    565       }
    566     }
    567   }
    568 
    569   GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
    570   unsigned retAlignment = 0;
    571 
    572   // Handle Result
    573   unsigned retCount = 0;
    574   if (Ins.size() > 0) {
    575     SmallVector<EVT, 16> resvtparts;
    576     ComputeValueVTs(*this, retTy, resvtparts);
    577 
    578     // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
    579     // individual .reg .b<size> func_retval<0..> for non ABI
    580     unsigned resultsz = 0;
    581     for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
    582       unsigned elems = 1;
    583       EVT elemtype = resvtparts[i];
    584       if (resvtparts[i].isVector()) {
    585         elems = resvtparts[i].getVectorNumElements();
    586         elemtype = resvtparts[i].getVectorElementType();
    587       }
    588       for (unsigned j=0,je=elems; j!=je; ++j) {
    589         unsigned sz = elemtype.getSizeInBits();
    590         if (isABI == false) {
    591           if (elemtype.isInteger() && (sz < 32)) sz = 32;
    592         }
    593         else {
    594           if (elemtype.isInteger() && (sz < 8)) sz = 8;
    595         }
    596         if (isABI == false) {
    597           SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    598           SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32),
    599                                       DAG.getConstant(sz, MVT::i32),
    600                                       DAG.getConstant(retCount, MVT::i32),
    601                                       InFlag };
    602           Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
    603                               DeclareRetOps, 5);
    604           InFlag = Chain.getValue(1);
    605           ++retCount;
    606         }
    607         resultsz += sz;
    608       }
    609     }
    610     if (isABI) {
    611       if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
    612           retTy->isPointerTy() ) {
    613         // Scalar needs to be at least 32bit wide
    614         if (resultsz < 32)
    615           resultsz = 32;
    616         SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    617         SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
    618                                     DAG.getConstant(resultsz, MVT::i32),
    619                                     DAG.getConstant(0, MVT::i32), InFlag };
    620         Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
    621                             DeclareRetOps, 5);
    622         InFlag = Chain.getValue(1);
    623       }
    624       else {
    625         if (Func) { // direct call
    626           if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
    627             retAlignment = getDataLayout()->getABITypeAlignment(retTy);
    628         } else { // indirect call
    629           const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
    630           if (!llvm::getAlign(*CallI, 0, retAlignment))
    631             retAlignment = getDataLayout()->getABITypeAlignment(retTy);
    632         }
    633         SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    634         SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
    635                                                            MVT::i32),
    636                                           DAG.getConstant(resultsz/8, MVT::i32),
    637                                          DAG.getConstant(0, MVT::i32), InFlag };
    638         Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
    639                             DeclareRetOps, 5);
    640         InFlag = Chain.getValue(1);
    641       }
    642     }
    643   }
    644 
    645   if (!Func) {
    646     // This is indirect function call case : PTX requires a prototype of the
    647     // form
    648     // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
    649     // to be emitted, and the label has to used as the last arg of call
    650     // instruction.
    651     // The prototype is embedded in a string and put as the operand for an
    652     // INLINEASM SDNode.
    653     SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    654     std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
    655     const char *asmstr = nvTM->getManagedStrPool()->
    656         getManagedString(proto_string.c_str())->c_str();
    657     SDValue InlineAsmOps[] = { Chain,
    658                                DAG.getTargetExternalSymbol(asmstr,
    659                                                            getPointerTy()),
    660                                                            DAG.getMDNode(0),
    661                                    DAG.getTargetConstant(0, MVT::i32), InFlag };
    662     Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
    663     InFlag = Chain.getValue(1);
    664   }
    665   // Op to just print "call"
    666   SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    667   SDValue PrintCallOps[] = { Chain,
    668                              DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
    669                                  : retCount, MVT::i32),
    670                                    InFlag };
    671   Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
    672       PrintCallVTs, PrintCallOps, 3);
    673   InFlag = Chain.getValue(1);
    674 
    675   // Ops to print out the function name
    676   SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    677   SDValue CallVoidOps[] = { Chain, Callee, InFlag };
    678   Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
    679   InFlag = Chain.getValue(1);
    680 
    681   // Ops to print out the param list
    682   SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    683   SDValue CallArgBeginOps[] = { Chain, InFlag };
    684   Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
    685                       CallArgBeginOps, 2);
    686   InFlag = Chain.getValue(1);
    687 
    688   for (unsigned i=0, e=paramCount; i!=e; ++i) {
    689     unsigned opcode;
    690     if (i==(e-1))
    691       opcode = NVPTXISD::LastCallArg;
    692     else
    693       opcode = NVPTXISD::CallArg;
    694     SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    695     SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
    696                              DAG.getConstant(i, MVT::i32),
    697                              InFlag };
    698     Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
    699     InFlag = Chain.getValue(1);
    700   }
    701   SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    702   SDValue CallArgEndOps[] = { Chain,
    703                               DAG.getConstant(Func ? 1 : 0, MVT::i32),
    704                               InFlag };
    705   Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
    706                       3);
    707   InFlag = Chain.getValue(1);
    708 
    709   if (!Func) {
    710     SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
    711     SDValue PrototypeOps[] = { Chain,
    712                                DAG.getConstant(uniqueCallSite, MVT::i32),
    713                                InFlag };
    714     Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
    715     InFlag = Chain.getValue(1);
    716   }
    717 
    718   // Generate loads from param memory/moves from registers for result
    719   if (Ins.size() > 0) {
    720     if (isABI) {
    721       unsigned resoffset = 0;
    722       for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
    723         unsigned sz = Ins[i].VT.getSizeInBits();
    724         if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
    725         EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue };
    726         SDValue LoadRetOps[] = {
    727           Chain,
    728           DAG.getConstant(1, MVT::i32),
    729           DAG.getConstant(resoffset, MVT::i32),
    730           InFlag
    731         };
    732         SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
    733                                      LoadRetOps, array_lengthof(LoadRetOps));
    734         Chain = retval.getValue(1);
    735         InFlag = retval.getValue(2);
    736         InVals.push_back(retval);
    737         resoffset += sz/8;
    738       }
    739     }
    740     else {
    741       SmallVector<EVT, 16> resvtparts;
    742       ComputeValueVTs(*this, retTy, resvtparts);
    743 
    744       assert(Ins.size() == resvtparts.size() &&
    745              "Unexpected number of return values in non-ABI case");
    746       unsigned paramNum = 0;
    747       for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
    748         assert(EVT(Ins[i].VT) == resvtparts[i] &&
    749                "Unexpected EVT type in non-ABI case");
    750         unsigned numelems = 1;
    751         EVT elemtype = Ins[i].VT;
    752         if (Ins[i].VT.isVector()) {
    753           numelems = Ins[i].VT.getVectorNumElements();
    754           elemtype = Ins[i].VT.getVectorElementType();
    755         }
    756         std::vector<SDValue> tempRetVals;
    757         for (unsigned j=0; j<numelems; ++j) {
    758           EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue };
    759           SDValue MoveRetOps[] = {
    760             Chain,
    761             DAG.getConstant(0, MVT::i32),
    762             DAG.getConstant(paramNum, MVT::i32),
    763             InFlag
    764           };
    765           SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
    766                                        MoveRetOps, array_lengthof(MoveRetOps));
    767           Chain = retval.getValue(1);
    768           InFlag = retval.getValue(2);
    769           tempRetVals.push_back(retval);
    770           ++paramNum;
    771         }
    772         if (Ins[i].VT.isVector())
    773           InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT,
    774                                        &tempRetVals[0], tempRetVals.size()));
    775         else
    776           InVals.push_back(tempRetVals[0]);
    777       }
    778     }
    779   }
    780   Chain = DAG.getCALLSEQ_END(Chain,
    781                              DAG.getIntPtrConstant(uniqueCallSite, true),
    782                              DAG.getIntPtrConstant(uniqueCallSite+1, true),
    783                              InFlag);
    784   uniqueCallSite++;
    785 
    786   // set isTailCall to false for now, until we figure out how to express
    787   // tail call optimization in PTX
    788   isTailCall = false;
    789   return Chain;
    790 }
    791 
    792 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
    793 // (see LegalizeDAG.cpp). This is slow and uses local memory.
    794 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
    795 SDValue NVPTXTargetLowering::
    796 LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
    797   SDNode *Node = Op.getNode();
    798   DebugLoc dl = Node->getDebugLoc();
    799   SmallVector<SDValue, 8> Ops;
    800   unsigned NumOperands = Node->getNumOperands();
    801   for (unsigned i=0; i < NumOperands; ++i) {
    802     SDValue SubOp = Node->getOperand(i);
    803     EVT VVT = SubOp.getNode()->getValueType(0);
    804     EVT EltVT = VVT.getVectorElementType();
    805     unsigned NumSubElem = VVT.getVectorNumElements();
    806     for (unsigned j=0; j < NumSubElem; ++j) {
    807       Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
    808                                 DAG.getIntPtrConstant(j)));
    809     }
    810   }
    811   return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
    812                      &Ops[0], Ops.size());
    813 }
    814 
    815 SDValue NVPTXTargetLowering::
    816 LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    817   switch (Op.getOpcode()) {
    818   case ISD::RETURNADDR: return SDValue();
    819   case ISD::FRAMEADDR:  return SDValue();
    820   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
    821   case ISD::INTRINSIC_W_CHAIN: return Op;
    822   case ISD::BUILD_VECTOR:
    823   case ISD::EXTRACT_SUBVECTOR:
    824     return Op;
    825   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
    826   case ISD::STORE: return LowerSTORE(Op, DAG);
    827   case ISD::LOAD: return LowerLOAD(Op, DAG);
    828   default:
    829     llvm_unreachable("Custom lowering not defined for operation");
    830   }
    831 }
    832 
    833 
    834 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
    835   if (Op.getValueType() == MVT::i1)
    836     return LowerLOADi1(Op, DAG);
    837   else
    838     return SDValue();
    839 }
    840 
    841 // v = ld i1* addr
    842 //   =>
    843 // v1 = ld i8* addr
    844 // v = trunc v1 to i1
    845 SDValue NVPTXTargetLowering::
    846 LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
    847   SDNode *Node = Op.getNode();
    848   LoadSDNode *LD = cast<LoadSDNode>(Node);
    849   DebugLoc dl = Node->getDebugLoc();
    850   assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ;
    851   assert(Node->getValueType(0) == MVT::i1 &&
    852          "Custom lowering for i1 load only");
    853   SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
    854                               LD->getPointerInfo(),
    855                               LD->isVolatile(), LD->isNonTemporal(),
    856                               LD->isInvariant(),
    857                               LD->getAlignment());
    858   SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
    859   // The legalizer (the caller) is expecting two values from the legalized
    860   // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
    861   // in LegalizeDAG.cpp which also uses MergeValues.
    862   SDValue Ops[] = {result, LD->getChain()};
    863   return DAG.getMergeValues(Ops, 2, dl);
    864 }
    865 
    866 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    867   EVT ValVT = Op.getOperand(1).getValueType();
    868   if (ValVT == MVT::i1)
    869     return LowerSTOREi1(Op, DAG);
    870   else if (ValVT.isVector())
    871     return LowerSTOREVector(Op, DAG);
    872   else
    873     return SDValue();
    874 }
    875 
    876 SDValue
    877 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
    878   SDNode *N = Op.getNode();
    879   SDValue Val = N->getOperand(1);
    880   DebugLoc DL = N->getDebugLoc();
    881   EVT ValVT = Val.getValueType();
    882 
    883   if (ValVT.isVector()) {
    884     // We only handle "native" vector sizes for now, e.g. <4 x double> is not
    885     // legal.  We can (and should) split that into 2 stores of <2 x double> here
    886     // but I'm leaving that as a TODO for now.
    887     if (!ValVT.isSimple())
    888       return SDValue();
    889     switch (ValVT.getSimpleVT().SimpleTy) {
    890     default: return SDValue();
    891     case MVT::v2i8:
    892     case MVT::v2i16:
    893     case MVT::v2i32:
    894     case MVT::v2i64:
    895     case MVT::v2f32:
    896     case MVT::v2f64:
    897     case MVT::v4i8:
    898     case MVT::v4i16:
    899     case MVT::v4i32:
    900     case MVT::v4f32:
    901       // This is a "native" vector type
    902       break;
    903     }
    904 
    905     unsigned Opcode = 0;
    906     EVT EltVT = ValVT.getVectorElementType();
    907     unsigned NumElts = ValVT.getVectorNumElements();
    908 
    909     // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
    910     // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
    911     // stored type to i16 and propogate the "real" type as the memory type.
    912     bool NeedExt = false;
    913     if (EltVT.getSizeInBits() < 16)
    914       NeedExt = true;
    915 
    916     switch (NumElts) {
    917     default:  return SDValue();
    918     case 2:
    919       Opcode = NVPTXISD::StoreV2;
    920       break;
    921     case 4: {
    922       Opcode = NVPTXISD::StoreV4;
    923       break;
    924     }
    925     }
    926 
    927     SmallVector<SDValue, 8> Ops;
    928 
    929     // First is the chain
    930     Ops.push_back(N->getOperand(0));
    931 
    932     // Then the split values
    933     for (unsigned i = 0; i < NumElts; ++i) {
    934       SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
    935                                    DAG.getIntPtrConstant(i));
    936       if (NeedExt)
    937         // ANY_EXTEND is correct here since the store will only look at the
    938         // lower-order bits anyway.
    939         ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
    940       Ops.push_back(ExtVal);
    941     }
    942 
    943     // Then any remaining arguments
    944     for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
    945       Ops.push_back(N->getOperand(i));
    946     }
    947 
    948     MemSDNode *MemSD = cast<MemSDNode>(N);
    949 
    950     SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL,
    951                                             DAG.getVTList(MVT::Other), &Ops[0],
    952                                             Ops.size(), MemSD->getMemoryVT(),
    953                                             MemSD->getMemOperand());
    954 
    955 
    956     //return DCI.CombineTo(N, NewSt, true);
    957     return NewSt;
    958   }
    959 
    960   return SDValue();
    961 }
    962 
    963 // st i1 v, addr
    964 //    =>
    965 // v1 = zxt v to i8
    966 // st i8, addr
    967 SDValue NVPTXTargetLowering::
    968 LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
    969   SDNode *Node = Op.getNode();
    970   DebugLoc dl = Node->getDebugLoc();
    971   StoreSDNode *ST = cast<StoreSDNode>(Node);
    972   SDValue Tmp1 = ST->getChain();
    973   SDValue Tmp2 = ST->getBasePtr();
    974   SDValue Tmp3 = ST->getValue();
    975   assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
    976   unsigned Alignment = ST->getAlignment();
    977   bool isVolatile = ST->isVolatile();
    978   bool isNonTemporal = ST->isNonTemporal();
    979   Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
    980                      MVT::i8, Tmp3);
    981   SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
    982                                 ST->getPointerInfo(), isVolatile,
    983                                 isNonTemporal, Alignment);
    984   return Result;
    985 }
    986 
    987 
    988 SDValue
    989 NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
    990                                 EVT v) const {
    991   std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
    992   std::stringstream suffix;
    993   suffix << idx;
    994   *name += suffix.str();
    995   return DAG.getTargetExternalSymbol(name->c_str(), v);
    996 }
    997 
    998 SDValue
    999 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
   1000   return getExtSymb(DAG, ".PARAM", idx, v);
   1001 }
   1002 
   1003 SDValue
   1004 NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
   1005   return getExtSymb(DAG, ".HLPPARAM", idx);
   1006 }
   1007 
   1008 // Check to see if the kernel argument is image*_t or sampler_t
   1009 
   1010 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
   1011   static const char *const specialTypes[] = {
   1012                                              "struct._image2d_t",
   1013                                              "struct._image3d_t",
   1014                                              "struct._sampler_t"
   1015   };
   1016 
   1017   const Type *Ty = arg->getType();
   1018   const PointerType *PTy = dyn_cast<PointerType>(Ty);
   1019 
   1020   if (!PTy)
   1021     return false;
   1022 
   1023   if (!context)
   1024     return false;
   1025 
   1026   const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
   1027   const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
   1028 
   1029   for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
   1030     if (TypeName == specialTypes[i])
   1031       return true;
   1032 
   1033   return false;
   1034 }
   1035 
   1036 SDValue
   1037 NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
   1038                                         CallingConv::ID CallConv, bool isVarArg,
   1039                                       const SmallVectorImpl<ISD::InputArg> &Ins,
   1040                                           DebugLoc dl, SelectionDAG &DAG,
   1041                                        SmallVectorImpl<SDValue> &InVals) const {
   1042   MachineFunction &MF = DAG.getMachineFunction();
   1043   const DataLayout *TD = getDataLayout();
   1044 
   1045   const Function *F = MF.getFunction();
   1046   const AttributeSet &PAL = F->getAttributes();
   1047 
   1048   SDValue Root = DAG.getRoot();
   1049   std::vector<SDValue> OutChains;
   1050 
   1051   bool isKernel = llvm::isKernelFunction(*F);
   1052   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
   1053 
   1054   std::vector<Type *> argTypes;
   1055   std::vector<const Argument *> theArgs;
   1056   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
   1057       I != E; ++I) {
   1058     theArgs.push_back(I);
   1059     argTypes.push_back(I->getType());
   1060   }
   1061   assert(argTypes.size() == Ins.size() &&
   1062          "Ins types and function types did not match");
   1063 
   1064   int idx = 0;
   1065   for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
   1066     Type *Ty = argTypes[i];
   1067     EVT ObjectVT = getValueType(Ty);
   1068     assert(ObjectVT == Ins[i].VT &&
   1069            "Ins type did not match function type");
   1070 
   1071     // If the kernel argument is image*_t or sampler_t, convert it to
   1072     // a i32 constant holding the parameter position. This can later
   1073     // matched in the AsmPrinter to output the correct mangled name.
   1074     if (isImageOrSamplerVal(theArgs[i],
   1075                            (theArgs[i]->getParent() ?
   1076                                theArgs[i]->getParent()->getParent() : 0))) {
   1077       assert(isKernel && "Only kernels can have image/sampler params");
   1078       InVals.push_back(DAG.getConstant(i+1, MVT::i32));
   1079       continue;
   1080     }
   1081 
   1082     if (theArgs[i]->use_empty()) {
   1083       // argument is dead
   1084       InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
   1085       continue;
   1086     }
   1087 
   1088     // In the following cases, assign a node order of "idx+1"
   1089     // to newly created nodes. The SDNOdes for params have to
   1090     // appear in the same order as their order of appearance
   1091     // in the original function. "idx+1" holds that order.
   1092     if (PAL.hasAttribute(i+1, Attribute::ByVal) == false) {
   1093       // A plain scalar.
   1094       if (isABI || isKernel) {
   1095         // If ABI, load from the param symbol
   1096         SDValue Arg = getParamSymbol(DAG, idx);
   1097         // Conjure up a value that we can get the address space from.
   1098         // FIXME: Using a constant here is a hack.
   1099         Value *srcValue = Constant::getNullValue(PointerType::get(
   1100                               ObjectVT.getTypeForEVT(F->getContext()),
   1101                               llvm::ADDRESS_SPACE_PARAM));
   1102         SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
   1103                                 MachinePointerInfo(srcValue), false, false,
   1104                                 false,
   1105                                 TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
   1106                                   F->getContext())));
   1107         if (p.getNode())
   1108           DAG.AssignOrdering(p.getNode(), idx+1);
   1109         InVals.push_back(p);
   1110       }
   1111       else {
   1112         // If no ABI, just move the param symbol
   1113         SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
   1114         SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
   1115         if (p.getNode())
   1116           DAG.AssignOrdering(p.getNode(), idx+1);
   1117         InVals.push_back(p);
   1118       }
   1119       continue;
   1120     }
   1121 
   1122     // Param has ByVal attribute
   1123     if (isABI || isKernel) {
   1124       // Return MoveParam(param symbol).
   1125       // Ideally, the param symbol can be returned directly,
   1126       // but when SDNode builder decides to use it in a CopyToReg(),
   1127       // machine instruction fails because TargetExternalSymbol
   1128       // (not lowered) is target dependent, and CopyToReg assumes
   1129       // the source is lowered.
   1130       SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
   1131       SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
   1132       if (p.getNode())
   1133         DAG.AssignOrdering(p.getNode(), idx+1);
   1134       if (isKernel)
   1135         InVals.push_back(p);
   1136       else {
   1137         SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
   1138                     DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
   1139                                  p);
   1140         InVals.push_back(p2);
   1141       }
   1142     } else {
   1143       // Have to move a set of param symbols to registers and
   1144       // store them locally and return the local pointer in InVals
   1145       const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
   1146       assert(elemPtrType &&
   1147              "Byval parameter should be a pointer type");
   1148       Type *elemType = elemPtrType->getElementType();
   1149       // Compute the constituent parts
   1150       SmallVector<EVT, 16> vtparts;
   1151       SmallVector<uint64_t, 16> offsets;
   1152       ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
   1153       unsigned totalsize = 0;
   1154       for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
   1155         totalsize += vtparts[j].getStoreSizeInBits();
   1156       SDValue localcopy =  DAG.getFrameIndex(MF.getFrameInfo()->
   1157                                       CreateStackObject(totalsize/8, 16, false),
   1158                                              getPointerTy());
   1159       unsigned sizesofar = 0;
   1160       std::vector<SDValue> theChains;
   1161       for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
   1162         unsigned numElems = 1;
   1163         if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
   1164         for (unsigned k=0, ke=numElems; k!=ke; ++k) {
   1165           EVT tmpvt = vtparts[j];
   1166           if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
   1167           SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
   1168                                     getParamSymbol(DAG, idx, tmpvt));
   1169           SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
   1170                                     DAG.getConstant(sizesofar, getPointerTy()));
   1171           theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
   1172                                         MachinePointerInfo(), false, false, 0));
   1173           sizesofar += tmpvt.getStoreSizeInBits()/8;
   1174           ++idx;
   1175         }
   1176       }
   1177       --idx;
   1178       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0],
   1179                           theChains.size());
   1180       InVals.push_back(localcopy);
   1181     }
   1182   }
   1183 
   1184   // Clang will check explicit VarArg and issue error if any. However, Clang
   1185   // will let code with
   1186   // implicit var arg like f() pass.
   1187   // We treat this case as if the arg list is empty.
   1188   //if (F.isVarArg()) {
   1189   // assert(0 && "VarArg not supported yet!");
   1190   //}
   1191 
   1192   if (!OutChains.empty())
   1193     DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   1194                             &OutChains[0], OutChains.size()));
   1195 
   1196   return Chain;
   1197 }
   1198 
   1199 SDValue
   1200 NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   1201                                  bool isVarArg,
   1202                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
   1203                                  const SmallVectorImpl<SDValue> &OutVals,
   1204                                  DebugLoc dl, SelectionDAG &DAG) const {
   1205 
   1206   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
   1207 
   1208   unsigned sizesofar = 0;
   1209   unsigned idx = 0;
   1210   for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
   1211     SDValue theVal = OutVals[i];
   1212     EVT theValType = theVal.getValueType();
   1213     unsigned numElems = 1;
   1214     if (theValType.isVector()) numElems = theValType.getVectorNumElements();
   1215     for (unsigned j=0,je=numElems; j!=je; ++j) {
   1216       SDValue tmpval = theVal;
   1217       if (theValType.isVector())
   1218         tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
   1219                              theValType.getVectorElementType(),
   1220                              tmpval, DAG.getIntPtrConstant(j));
   1221       Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval,
   1222           dl, MVT::Other,
   1223           Chain,
   1224           DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
   1225           tmpval);
   1226       if (theValType.isVector())
   1227         sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8;
   1228       else
   1229         sizesofar += theValType.getStoreSizeInBits()/8;
   1230       ++idx;
   1231     }
   1232   }
   1233 
   1234   return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
   1235 }
   1236 
   1237 void
   1238 NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   1239                                                   std::string &Constraint,
   1240                                                   std::vector<SDValue> &Ops,
   1241                                                   SelectionDAG &DAG) const
   1242 {
   1243   if (Constraint.length() > 1)
   1244     return;
   1245   else
   1246     TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
   1247 }
   1248 
   1249 // NVPTX suuport vector of legal types of any length in Intrinsics because the
   1250 // NVPTX specific type legalizer
   1251 // will legalize them to the PTX supported length.
   1252 bool
   1253 NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
   1254   if (isTypeLegal(VT))
   1255     return true;
   1256   if (VT.isVector()) {
   1257     MVT eVT = VT.getVectorElementType();
   1258     if (isTypeLegal(eVT))
   1259       return true;
   1260   }
   1261   return false;
   1262 }
   1263 
   1264 
   1265 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
   1266 // TgtMemIntrinsic
   1267 // because we need the information that is only available in the "Value" type
   1268 // of destination
   1269 // pointer. In particular, the address space information.
   1270 bool
   1271 NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
   1272                                         unsigned Intrinsic) const {
   1273   switch (Intrinsic) {
   1274   default:
   1275     return false;
   1276 
   1277   case Intrinsic::nvvm_atomic_load_add_f32:
   1278     Info.opc = ISD::INTRINSIC_W_CHAIN;
   1279     Info.memVT = MVT::f32;
   1280     Info.ptrVal = I.getArgOperand(0);
   1281     Info.offset = 0;
   1282     Info.vol = 0;
   1283     Info.readMem = true;
   1284     Info.writeMem = true;
   1285     Info.align = 0;
   1286     return true;
   1287 
   1288   case Intrinsic::nvvm_atomic_load_inc_32:
   1289   case Intrinsic::nvvm_atomic_load_dec_32:
   1290     Info.opc = ISD::INTRINSIC_W_CHAIN;
   1291     Info.memVT = MVT::i32;
   1292     Info.ptrVal = I.getArgOperand(0);
   1293     Info.offset = 0;
   1294     Info.vol = 0;
   1295     Info.readMem = true;
   1296     Info.writeMem = true;
   1297     Info.align = 0;
   1298     return true;
   1299 
   1300   case Intrinsic::nvvm_ldu_global_i:
   1301   case Intrinsic::nvvm_ldu_global_f:
   1302   case Intrinsic::nvvm_ldu_global_p:
   1303 
   1304     Info.opc = ISD::INTRINSIC_W_CHAIN;
   1305     if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
   1306       Info.memVT = MVT::i32;
   1307     else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
   1308       Info.memVT = getPointerTy();
   1309     else
   1310       Info.memVT = MVT::f32;
   1311     Info.ptrVal = I.getArgOperand(0);
   1312     Info.offset = 0;
   1313     Info.vol = 0;
   1314     Info.readMem = true;
   1315     Info.writeMem = false;
   1316     Info.align = 0;
   1317     return true;
   1318 
   1319   }
   1320   return false;
   1321 }
   1322 
   1323 /// isLegalAddressingMode - Return true if the addressing mode represented
   1324 /// by AM is legal for this target, for a load/store of the specified type.
   1325 /// Used to guide target specific optimizations, like loop strength reduction
   1326 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
   1327 /// (CodeGenPrepare.cpp)
   1328 bool
   1329 NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   1330                                            Type *Ty) const {
   1331 
   1332   // AddrMode - This represents an addressing mode of:
   1333   //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
   1334   //
   1335   // The legal address modes are
   1336   // - [avar]
   1337   // - [areg]
   1338   // - [areg+immoff]
   1339   // - [immAddr]
   1340 
   1341   if (AM.BaseGV) {
   1342     if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
   1343       return false;
   1344     return true;
   1345   }
   1346 
   1347   switch (AM.Scale) {
   1348   case 0:  // "r", "r+i" or "i" is allowed
   1349     break;
   1350   case 1:
   1351     if (AM.HasBaseReg)  // "r+r+i" or "r+r" is not allowed.
   1352       return false;
   1353     // Otherwise we have r+i.
   1354     break;
   1355   default:
   1356     // No scale > 1 is allowed
   1357     return false;
   1358   }
   1359   return true;
   1360 }
   1361 
   1362 //===----------------------------------------------------------------------===//
   1363 //                         NVPTX Inline Assembly Support
   1364 //===----------------------------------------------------------------------===//
   1365 
   1366 /// getConstraintType - Given a constraint letter, return the type of
   1367 /// constraint it is for this target.
   1368 NVPTXTargetLowering::ConstraintType
   1369 NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
   1370   if (Constraint.size() == 1) {
   1371     switch (Constraint[0]) {
   1372     default:
   1373       break;
   1374     case 'r':
   1375     case 'h':
   1376     case 'c':
   1377     case 'l':
   1378     case 'f':
   1379     case 'd':
   1380     case '0':
   1381     case 'N':
   1382       return C_RegisterClass;
   1383     }
   1384   }
   1385   return TargetLowering::getConstraintType(Constraint);
   1386 }
   1387 
   1388 
   1389 std::pair<unsigned, const TargetRegisterClass*>
   1390 NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   1391                                                   EVT VT) const {
   1392   if (Constraint.size() == 1) {
   1393     switch (Constraint[0]) {
   1394     case 'c':
   1395       return std::make_pair(0U, &NVPTX::Int8RegsRegClass);
   1396     case 'h':
   1397       return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
   1398     case 'r':
   1399       return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
   1400     case 'l':
   1401     case 'N':
   1402       return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
   1403     case 'f':
   1404       return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
   1405     case 'd':
   1406       return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
   1407     }
   1408   }
   1409   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
   1410 }
   1411 
   1412 
   1413 
   1414 /// getFunctionAlignment - Return the Log2 alignment of this function.
   1415 unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
   1416   return 4;
   1417 }
   1418 
   1419 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
   1420 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
   1421                               SmallVectorImpl<SDValue>& Results) {
   1422   EVT ResVT = N->getValueType(0);
   1423   DebugLoc DL = N->getDebugLoc();
   1424 
   1425   assert(ResVT.isVector() && "Vector load must have vector type");
   1426 
   1427   // We only handle "native" vector sizes for now, e.g. <4 x double> is not
   1428   // legal.  We can (and should) split that into 2 loads of <2 x double> here
   1429   // but I'm leaving that as a TODO for now.
   1430   assert(ResVT.isSimple() && "Can only handle simple types");
   1431   switch (ResVT.getSimpleVT().SimpleTy) {
   1432   default: return;
   1433   case MVT::v2i8:
   1434   case MVT::v2i16:
   1435   case MVT::v2i32:
   1436   case MVT::v2i64:
   1437   case MVT::v2f32:
   1438   case MVT::v2f64:
   1439   case MVT::v4i8:
   1440   case MVT::v4i16:
   1441   case MVT::v4i32:
   1442   case MVT::v4f32:
   1443     // This is a "native" vector type
   1444     break;
   1445   }
   1446 
   1447   EVT EltVT = ResVT.getVectorElementType();
   1448   unsigned NumElts = ResVT.getVectorNumElements();
   1449 
   1450   // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
   1451   // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
   1452   // loaded type to i16 and propogate the "real" type as the memory type.
   1453   bool NeedTrunc = false;
   1454   if (EltVT.getSizeInBits() < 16) {
   1455     EltVT = MVT::i16;
   1456     NeedTrunc = true;
   1457   }
   1458 
   1459   unsigned Opcode = 0;
   1460   SDVTList LdResVTs;
   1461 
   1462   switch (NumElts) {
   1463   default:  return;
   1464   case 2:
   1465     Opcode = NVPTXISD::LoadV2;
   1466     LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
   1467     break;
   1468   case 4: {
   1469     Opcode = NVPTXISD::LoadV4;
   1470     EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
   1471     LdResVTs = DAG.getVTList(ListVTs, 5);
   1472     break;
   1473   }
   1474   }
   1475 
   1476   SmallVector<SDValue, 8> OtherOps;
   1477 
   1478   // Copy regular operands
   1479   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
   1480     OtherOps.push_back(N->getOperand(i));
   1481 
   1482   LoadSDNode *LD = cast<LoadSDNode>(N);
   1483 
   1484   // The select routine does not have access to the LoadSDNode instance, so
   1485   // pass along the extension information
   1486   OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
   1487 
   1488   SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
   1489                                           OtherOps.size(), LD->getMemoryVT(),
   1490                                           LD->getMemOperand());
   1491 
   1492   SmallVector<SDValue, 4> ScalarRes;
   1493 
   1494   for (unsigned i = 0; i < NumElts; ++i) {
   1495     SDValue Res = NewLD.getValue(i);
   1496     if (NeedTrunc)
   1497       Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
   1498     ScalarRes.push_back(Res);
   1499   }
   1500 
   1501   SDValue LoadChain = NewLD.getValue(NumElts);
   1502 
   1503   SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
   1504 
   1505   Results.push_back(BuildVec);
   1506   Results.push_back(LoadChain);
   1507 }
   1508 
   1509 static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
   1510                                      SelectionDAG &DAG,
   1511                                      SmallVectorImpl<SDValue> &Results) {
   1512   SDValue Chain = N->getOperand(0);
   1513   SDValue Intrin = N->getOperand(1);
   1514   DebugLoc DL = N->getDebugLoc();
   1515 
   1516   // Get the intrinsic ID
   1517   unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
   1518   switch(IntrinNo) {
   1519   default: return;
   1520   case Intrinsic::nvvm_ldg_global_i:
   1521   case Intrinsic::nvvm_ldg_global_f:
   1522   case Intrinsic::nvvm_ldg_global_p:
   1523   case Intrinsic::nvvm_ldu_global_i:
   1524   case Intrinsic::nvvm_ldu_global_f:
   1525   case Intrinsic::nvvm_ldu_global_p: {
   1526     EVT ResVT = N->getValueType(0);
   1527 
   1528     if (ResVT.isVector()) {
   1529       // Vector LDG/LDU
   1530 
   1531       unsigned NumElts = ResVT.getVectorNumElements();
   1532       EVT EltVT = ResVT.getVectorElementType();
   1533 
   1534       // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
   1535       // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
   1536       // loaded type to i16 and propogate the "real" type as the memory type.
   1537       bool NeedTrunc = false;
   1538       if (EltVT.getSizeInBits() < 16) {
   1539         EltVT = MVT::i16;
   1540         NeedTrunc = true;
   1541       }
   1542 
   1543       unsigned Opcode = 0;
   1544       SDVTList LdResVTs;
   1545 
   1546       switch (NumElts) {
   1547       default:  return;
   1548       case 2:
   1549         switch(IntrinNo) {
   1550         default: return;
   1551         case Intrinsic::nvvm_ldg_global_i:
   1552         case Intrinsic::nvvm_ldg_global_f:
   1553         case Intrinsic::nvvm_ldg_global_p:
   1554           Opcode = NVPTXISD::LDGV2;
   1555           break;
   1556         case Intrinsic::nvvm_ldu_global_i:
   1557         case Intrinsic::nvvm_ldu_global_f:
   1558         case Intrinsic::nvvm_ldu_global_p:
   1559           Opcode = NVPTXISD::LDUV2;
   1560           break;
   1561         }
   1562         LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
   1563         break;
   1564       case 4: {
   1565         switch(IntrinNo) {
   1566         default: return;
   1567         case Intrinsic::nvvm_ldg_global_i:
   1568         case Intrinsic::nvvm_ldg_global_f:
   1569         case Intrinsic::nvvm_ldg_global_p:
   1570           Opcode = NVPTXISD::LDGV4;
   1571           break;
   1572         case Intrinsic::nvvm_ldu_global_i:
   1573         case Intrinsic::nvvm_ldu_global_f:
   1574         case Intrinsic::nvvm_ldu_global_p:
   1575           Opcode = NVPTXISD::LDUV4;
   1576           break;
   1577         }
   1578         EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
   1579         LdResVTs = DAG.getVTList(ListVTs, 5);
   1580         break;
   1581       }
   1582       }
   1583 
   1584       SmallVector<SDValue, 8> OtherOps;
   1585 
   1586       // Copy regular operands
   1587 
   1588       OtherOps.push_back(Chain); // Chain
   1589       // Skip operand 1 (intrinsic ID)
   1590       // Others
   1591       for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
   1592         OtherOps.push_back(N->getOperand(i));
   1593 
   1594       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
   1595 
   1596       SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
   1597                                               OtherOps.size(), MemSD->getMemoryVT(),
   1598                                               MemSD->getMemOperand());
   1599 
   1600       SmallVector<SDValue, 4> ScalarRes;
   1601 
   1602       for (unsigned i = 0; i < NumElts; ++i) {
   1603         SDValue Res = NewLD.getValue(i);
   1604         if (NeedTrunc)
   1605           Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
   1606         ScalarRes.push_back(Res);
   1607       }
   1608 
   1609       SDValue LoadChain = NewLD.getValue(NumElts);
   1610 
   1611       SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
   1612 
   1613       Results.push_back(BuildVec);
   1614       Results.push_back(LoadChain);
   1615     } else {
   1616       // i8 LDG/LDU
   1617       assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
   1618              "Custom handling of non-i8 ldu/ldg?");
   1619 
   1620       // Just copy all operands as-is
   1621       SmallVector<SDValue, 4> Ops;
   1622       for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
   1623         Ops.push_back(N->getOperand(i));
   1624 
   1625       // Force output to i16
   1626       SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
   1627 
   1628       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
   1629 
   1630       // We make sure the memory type is i8, which will be used during isel
   1631       // to select the proper instruction.
   1632       SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL,
   1633                                               LdResVTs, &Ops[0],
   1634                                               Ops.size(), MVT::i8,
   1635                                               MemSD->getMemOperand());
   1636 
   1637       Results.push_back(NewLD.getValue(0));
   1638       Results.push_back(NewLD.getValue(1));
   1639     }
   1640   }
   1641   }
   1642 }
   1643 
   1644 void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N,
   1645                                              SmallVectorImpl<SDValue> &Results,
   1646                                              SelectionDAG &DAG) const {
   1647   switch (N->getOpcode()) {
   1648   default: report_fatal_error("Unhandled custom legalization");
   1649   case ISD::LOAD:
   1650     ReplaceLoadVector(N, DAG, Results);
   1651     return;
   1652   case ISD::INTRINSIC_W_CHAIN:
   1653     ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
   1654     return;
   1655   }
   1656 }
   1657