Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines an instruction selector for the NVPTX target.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "NVPTXISelDAGToDAG.h"
     15 #include "llvm/IR/GlobalValue.h"
     16 #include "llvm/IR/Instructions.h"
     17 #include "llvm/Support/CommandLine.h"
     18 #include "llvm/Support/Debug.h"
     19 #include "llvm/Support/ErrorHandling.h"
     20 #include "llvm/Support/raw_ostream.h"
     21 #include "llvm/Target/TargetIntrinsicInfo.h"
     22 
     23 using namespace llvm;
     24 
     25 #define DEBUG_TYPE "nvptx-isel"
     26 
     27 unsigned FMAContractLevel = 0;
     28 
     29 static cl::opt<unsigned, true>
     30 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
     31                     cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
     32                              " 1: do it  2: do it aggressively"),
     33                     cl::location(FMAContractLevel),
     34                     cl::init(2));
     35 
     36 static cl::opt<int> UsePrecDivF32(
     37     "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
     38     cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
     39              " IEEE Compliant F32 div.rnd if avaiable."),
     40     cl::init(2));
     41 
     42 static cl::opt<bool>
     43 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
     44           cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
     45           cl::init(true));
     46 
     47 static cl::opt<bool>
     48 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
     49            cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
     50            cl::init(false));
     51 
     52 
     53 /// createNVPTXISelDag - This pass converts a legalized DAG into a
     54 /// NVPTX-specific DAG, ready for instruction scheduling.
     55 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
     56                                        llvm::CodeGenOpt::Level OptLevel) {
     57   return new NVPTXDAGToDAGISel(TM, OptLevel);
     58 }
     59 
     60 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
     61                                      CodeGenOpt::Level OptLevel)
     62     : SelectionDAGISel(tm, OptLevel),
     63       Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
     64 
     65   doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
     66   doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
     67   doFMAF32AGG =
     68       (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
     69   doFMAF64AGG =
     70       (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
     71 
     72   allowFMA = (FMAContractLevel >= 1);
     73 
     74   doMulWide = (OptLevel > 0);
     75 }
     76 
     77 int NVPTXDAGToDAGISel::getDivF32Level() const {
     78   if (UsePrecDivF32.getNumOccurrences() > 0) {
     79     // If nvptx-prec-div32=N is used on the command-line, always honor it
     80     return UsePrecDivF32;
     81   } else {
     82     // Otherwise, use div.approx if fast math is enabled
     83     if (TM.Options.UnsafeFPMath)
     84       return 0;
     85     else
     86       return 2;
     87   }
     88 }
     89 
     90 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
     91   if (UsePrecSqrtF32.getNumOccurrences() > 0) {
     92     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
     93     return UsePrecSqrtF32;
     94   } else {
     95     // Otherwise, use sqrt.approx if fast math is enabled
     96     if (TM.Options.UnsafeFPMath)
     97       return false;
     98     else
     99       return true;
    100   }
    101 }
    102 
    103 bool NVPTXDAGToDAGISel::useF32FTZ() const {
    104   if (FtzEnabled.getNumOccurrences() > 0) {
    105     // If nvptx-f32ftz is used on the command-line, always honor it
    106     return FtzEnabled;
    107   } else {
    108     const Function *F = MF->getFunction();
    109     // Otherwise, check for an nvptx-f32ftz attribute on the function
    110     if (F->hasFnAttribute("nvptx-f32ftz"))
    111       return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
    112                                               "nvptx-f32ftz")
    113                                               .getValueAsString() == "true");
    114     else
    115       return false;
    116   }
    117 }
    118 
    119 /// Select - Select instructions not customized! Used for
    120 /// expanded, promoted and normal instructions.
    121 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
    122 
    123   if (N->isMachineOpcode()) {
    124     N->setNodeId(-1);
    125     return nullptr; // Already selected.
    126   }
    127 
    128   SDNode *ResNode = nullptr;
    129   switch (N->getOpcode()) {
    130   case ISD::LOAD:
    131     ResNode = SelectLoad(N);
    132     break;
    133   case ISD::STORE:
    134     ResNode = SelectStore(N);
    135     break;
    136   case NVPTXISD::LoadV2:
    137   case NVPTXISD::LoadV4:
    138     ResNode = SelectLoadVector(N);
    139     break;
    140   case NVPTXISD::LDGV2:
    141   case NVPTXISD::LDGV4:
    142   case NVPTXISD::LDUV2:
    143   case NVPTXISD::LDUV4:
    144     ResNode = SelectLDGLDU(N);
    145     break;
    146   case NVPTXISD::StoreV2:
    147   case NVPTXISD::StoreV4:
    148     ResNode = SelectStoreVector(N);
    149     break;
    150   case NVPTXISD::LoadParam:
    151   case NVPTXISD::LoadParamV2:
    152   case NVPTXISD::LoadParamV4:
    153     ResNode = SelectLoadParam(N);
    154     break;
    155   case NVPTXISD::StoreRetval:
    156   case NVPTXISD::StoreRetvalV2:
    157   case NVPTXISD::StoreRetvalV4:
    158     ResNode = SelectStoreRetval(N);
    159     break;
    160   case NVPTXISD::StoreParam:
    161   case NVPTXISD::StoreParamV2:
    162   case NVPTXISD::StoreParamV4:
    163   case NVPTXISD::StoreParamS32:
    164   case NVPTXISD::StoreParamU32:
    165     ResNode = SelectStoreParam(N);
    166     break;
    167   case ISD::INTRINSIC_WO_CHAIN:
    168     ResNode = SelectIntrinsicNoChain(N);
    169     break;
    170   case ISD::INTRINSIC_W_CHAIN:
    171     ResNode = SelectIntrinsicChain(N);
    172     break;
    173   case NVPTXISD::Tex1DFloatI32:
    174   case NVPTXISD::Tex1DFloatFloat:
    175   case NVPTXISD::Tex1DFloatFloatLevel:
    176   case NVPTXISD::Tex1DFloatFloatGrad:
    177   case NVPTXISD::Tex1DI32I32:
    178   case NVPTXISD::Tex1DI32Float:
    179   case NVPTXISD::Tex1DI32FloatLevel:
    180   case NVPTXISD::Tex1DI32FloatGrad:
    181   case NVPTXISD::Tex1DArrayFloatI32:
    182   case NVPTXISD::Tex1DArrayFloatFloat:
    183   case NVPTXISD::Tex1DArrayFloatFloatLevel:
    184   case NVPTXISD::Tex1DArrayFloatFloatGrad:
    185   case NVPTXISD::Tex1DArrayI32I32:
    186   case NVPTXISD::Tex1DArrayI32Float:
    187   case NVPTXISD::Tex1DArrayI32FloatLevel:
    188   case NVPTXISD::Tex1DArrayI32FloatGrad:
    189   case NVPTXISD::Tex2DFloatI32:
    190   case NVPTXISD::Tex2DFloatFloat:
    191   case NVPTXISD::Tex2DFloatFloatLevel:
    192   case NVPTXISD::Tex2DFloatFloatGrad:
    193   case NVPTXISD::Tex2DI32I32:
    194   case NVPTXISD::Tex2DI32Float:
    195   case NVPTXISD::Tex2DI32FloatLevel:
    196   case NVPTXISD::Tex2DI32FloatGrad:
    197   case NVPTXISD::Tex2DArrayFloatI32:
    198   case NVPTXISD::Tex2DArrayFloatFloat:
    199   case NVPTXISD::Tex2DArrayFloatFloatLevel:
    200   case NVPTXISD::Tex2DArrayFloatFloatGrad:
    201   case NVPTXISD::Tex2DArrayI32I32:
    202   case NVPTXISD::Tex2DArrayI32Float:
    203   case NVPTXISD::Tex2DArrayI32FloatLevel:
    204   case NVPTXISD::Tex2DArrayI32FloatGrad:
    205   case NVPTXISD::Tex3DFloatI32:
    206   case NVPTXISD::Tex3DFloatFloat:
    207   case NVPTXISD::Tex3DFloatFloatLevel:
    208   case NVPTXISD::Tex3DFloatFloatGrad:
    209   case NVPTXISD::Tex3DI32I32:
    210   case NVPTXISD::Tex3DI32Float:
    211   case NVPTXISD::Tex3DI32FloatLevel:
    212   case NVPTXISD::Tex3DI32FloatGrad:
    213     ResNode = SelectTextureIntrinsic(N);
    214     break;
    215   case NVPTXISD::Suld1DI8Trap:
    216   case NVPTXISD::Suld1DI16Trap:
    217   case NVPTXISD::Suld1DI32Trap:
    218   case NVPTXISD::Suld1DV2I8Trap:
    219   case NVPTXISD::Suld1DV2I16Trap:
    220   case NVPTXISD::Suld1DV2I32Trap:
    221   case NVPTXISD::Suld1DV4I8Trap:
    222   case NVPTXISD::Suld1DV4I16Trap:
    223   case NVPTXISD::Suld1DV4I32Trap:
    224   case NVPTXISD::Suld1DArrayI8Trap:
    225   case NVPTXISD::Suld1DArrayI16Trap:
    226   case NVPTXISD::Suld1DArrayI32Trap:
    227   case NVPTXISD::Suld1DArrayV2I8Trap:
    228   case NVPTXISD::Suld1DArrayV2I16Trap:
    229   case NVPTXISD::Suld1DArrayV2I32Trap:
    230   case NVPTXISD::Suld1DArrayV4I8Trap:
    231   case NVPTXISD::Suld1DArrayV4I16Trap:
    232   case NVPTXISD::Suld1DArrayV4I32Trap:
    233   case NVPTXISD::Suld2DI8Trap:
    234   case NVPTXISD::Suld2DI16Trap:
    235   case NVPTXISD::Suld2DI32Trap:
    236   case NVPTXISD::Suld2DV2I8Trap:
    237   case NVPTXISD::Suld2DV2I16Trap:
    238   case NVPTXISD::Suld2DV2I32Trap:
    239   case NVPTXISD::Suld2DV4I8Trap:
    240   case NVPTXISD::Suld2DV4I16Trap:
    241   case NVPTXISD::Suld2DV4I32Trap:
    242   case NVPTXISD::Suld2DArrayI8Trap:
    243   case NVPTXISD::Suld2DArrayI16Trap:
    244   case NVPTXISD::Suld2DArrayI32Trap:
    245   case NVPTXISD::Suld2DArrayV2I8Trap:
    246   case NVPTXISD::Suld2DArrayV2I16Trap:
    247   case NVPTXISD::Suld2DArrayV2I32Trap:
    248   case NVPTXISD::Suld2DArrayV4I8Trap:
    249   case NVPTXISD::Suld2DArrayV4I16Trap:
    250   case NVPTXISD::Suld2DArrayV4I32Trap:
    251   case NVPTXISD::Suld3DI8Trap:
    252   case NVPTXISD::Suld3DI16Trap:
    253   case NVPTXISD::Suld3DI32Trap:
    254   case NVPTXISD::Suld3DV2I8Trap:
    255   case NVPTXISD::Suld3DV2I16Trap:
    256   case NVPTXISD::Suld3DV2I32Trap:
    257   case NVPTXISD::Suld3DV4I8Trap:
    258   case NVPTXISD::Suld3DV4I16Trap:
    259   case NVPTXISD::Suld3DV4I32Trap:
    260     ResNode = SelectSurfaceIntrinsic(N);
    261     break;
    262   case ISD::AND:
    263   case ISD::SRA:
    264   case ISD::SRL:
    265     // Try to select BFE
    266     ResNode = SelectBFE(N);
    267     break;
    268   case ISD::ADDRSPACECAST:
    269     ResNode = SelectAddrSpaceCast(N);
    270     break;
    271   default:
    272     break;
    273   }
    274   if (ResNode)
    275     return ResNode;
    276   return SelectCode(N);
    277 }
    278 
    279 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
    280   unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
    281   switch (IID) {
    282   default:
    283     return NULL;
    284   case Intrinsic::nvvm_ldg_global_f:
    285   case Intrinsic::nvvm_ldg_global_i:
    286   case Intrinsic::nvvm_ldg_global_p:
    287   case Intrinsic::nvvm_ldu_global_f:
    288   case Intrinsic::nvvm_ldu_global_i:
    289   case Intrinsic::nvvm_ldu_global_p:
    290     return SelectLDGLDU(N);
    291   }
    292 }
    293 
    294 static unsigned int getCodeAddrSpace(MemSDNode *N,
    295                                      const NVPTXSubtarget &Subtarget) {
    296   const Value *Src = N->getMemOperand()->getValue();
    297 
    298   if (!Src)
    299     return NVPTX::PTXLdStInstCode::GENERIC;
    300 
    301   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
    302     switch (PT->getAddressSpace()) {
    303     case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
    304     case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
    305     case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
    306     case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
    307     case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
    308     case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
    309     default: break;
    310     }
    311   }
    312   return NVPTX::PTXLdStInstCode::GENERIC;
    313 }
    314 
    315 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
    316   unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
    317   switch (IID) {
    318   default:
    319     return nullptr;
    320   case Intrinsic::nvvm_texsurf_handle_internal:
    321     return SelectTexSurfHandle(N);
    322   }
    323 }
    324 
    325 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
    326   // Op 0 is the intrinsic ID
    327   SDValue Wrapper = N->getOperand(1);
    328   SDValue GlobalVal = Wrapper.getOperand(0);
    329   return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
    330                                 GlobalVal);
    331 }
    332 
    333 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
    334   SDValue Src = N->getOperand(0);
    335   AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
    336   unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
    337   unsigned DstAddrSpace = CastN->getDestAddressSpace();
    338 
    339   assert(SrcAddrSpace != DstAddrSpace &&
    340          "addrspacecast must be between different address spaces");
    341 
    342   if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
    343     // Specific to generic
    344     unsigned Opc;
    345     switch (SrcAddrSpace) {
    346     default: report_fatal_error("Bad address space in addrspacecast");
    347     case ADDRESS_SPACE_GLOBAL:
    348       Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
    349                                 : NVPTX::cvta_global_yes;
    350       break;
    351     case ADDRESS_SPACE_SHARED:
    352       Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
    353                                 : NVPTX::cvta_shared_yes;
    354       break;
    355     case ADDRESS_SPACE_CONST:
    356       Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
    357                                 : NVPTX::cvta_const_yes;
    358       break;
    359     case ADDRESS_SPACE_LOCAL:
    360       Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
    361                                 : NVPTX::cvta_local_yes;
    362       break;
    363     }
    364     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
    365   } else {
    366     // Generic to specific
    367     if (SrcAddrSpace != 0)
    368       report_fatal_error("Cannot cast between two non-generic address spaces");
    369     unsigned Opc;
    370     switch (DstAddrSpace) {
    371     default: report_fatal_error("Bad address space in addrspacecast");
    372     case ADDRESS_SPACE_GLOBAL:
    373       Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
    374                                 : NVPTX::cvta_to_global_yes;
    375       break;
    376     case ADDRESS_SPACE_SHARED:
    377       Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
    378                                 : NVPTX::cvta_to_shared_yes;
    379       break;
    380     case ADDRESS_SPACE_CONST:
    381       Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
    382                                 : NVPTX::cvta_to_const_yes;
    383       break;
    384     case ADDRESS_SPACE_LOCAL:
    385       Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
    386                                 : NVPTX::cvta_to_local_yes;
    387       break;
    388     }
    389     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
    390   }
    391 }
    392 
    393 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
    394   SDLoc dl(N);
    395   LoadSDNode *LD = cast<LoadSDNode>(N);
    396   EVT LoadedVT = LD->getMemoryVT();
    397   SDNode *NVPTXLD = nullptr;
    398 
    399   // do not support pre/post inc/dec
    400   if (LD->isIndexed())
    401     return nullptr;
    402 
    403   if (!LoadedVT.isSimple())
    404     return nullptr;
    405 
    406   // Address Space Setting
    407   unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
    408 
    409   // Volatile Setting
    410   // - .volatile is only availalble for .global and .shared
    411   bool isVolatile = LD->isVolatile();
    412   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    413       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    414       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    415     isVolatile = false;
    416 
    417   // Vector Setting
    418   MVT SimpleVT = LoadedVT.getSimpleVT();
    419   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
    420   if (SimpleVT.isVector()) {
    421     unsigned num = SimpleVT.getVectorNumElements();
    422     if (num == 2)
    423       vecType = NVPTX::PTXLdStInstCode::V2;
    424     else if (num == 4)
    425       vecType = NVPTX::PTXLdStInstCode::V4;
    426     else
    427       return nullptr;
    428   }
    429 
    430   // Type Setting: fromType + fromTypeWidth
    431   //
    432   // Sign   : ISD::SEXTLOAD
    433   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
    434   //          type is integer
    435   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
    436   MVT ScalarVT = SimpleVT.getScalarType();
    437   // Read at least 8 bits (predicates are stored as 8-bit values)
    438   unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
    439   unsigned int fromType;
    440   if ((LD->getExtensionType() == ISD::SEXTLOAD))
    441     fromType = NVPTX::PTXLdStInstCode::Signed;
    442   else if (ScalarVT.isFloatingPoint())
    443     fromType = NVPTX::PTXLdStInstCode::Float;
    444   else
    445     fromType = NVPTX::PTXLdStInstCode::Unsigned;
    446 
    447   // Create the machine instruction DAG
    448   SDValue Chain = N->getOperand(0);
    449   SDValue N1 = N->getOperand(1);
    450   SDValue Addr;
    451   SDValue Offset, Base;
    452   unsigned Opcode;
    453   MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
    454 
    455   if (SelectDirectAddr(N1, Addr)) {
    456     switch (TargetVT) {
    457     case MVT::i8:
    458       Opcode = NVPTX::LD_i8_avar;
    459       break;
    460     case MVT::i16:
    461       Opcode = NVPTX::LD_i16_avar;
    462       break;
    463     case MVT::i32:
    464       Opcode = NVPTX::LD_i32_avar;
    465       break;
    466     case MVT::i64:
    467       Opcode = NVPTX::LD_i64_avar;
    468       break;
    469     case MVT::f32:
    470       Opcode = NVPTX::LD_f32_avar;
    471       break;
    472     case MVT::f64:
    473       Opcode = NVPTX::LD_f64_avar;
    474       break;
    475     default:
    476       return nullptr;
    477     }
    478     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
    479                       getI32Imm(vecType), getI32Imm(fromType),
    480                       getI32Imm(fromTypeWidth), Addr, Chain };
    481     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    482   } else if (Subtarget.is64Bit()
    483                  ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
    484                  : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
    485     switch (TargetVT) {
    486     case MVT::i8:
    487       Opcode = NVPTX::LD_i8_asi;
    488       break;
    489     case MVT::i16:
    490       Opcode = NVPTX::LD_i16_asi;
    491       break;
    492     case MVT::i32:
    493       Opcode = NVPTX::LD_i32_asi;
    494       break;
    495     case MVT::i64:
    496       Opcode = NVPTX::LD_i64_asi;
    497       break;
    498     case MVT::f32:
    499       Opcode = NVPTX::LD_f32_asi;
    500       break;
    501     case MVT::f64:
    502       Opcode = NVPTX::LD_f64_asi;
    503       break;
    504     default:
    505       return nullptr;
    506     }
    507     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
    508                       getI32Imm(vecType), getI32Imm(fromType),
    509                       getI32Imm(fromTypeWidth), Base, Offset, Chain };
    510     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    511   } else if (Subtarget.is64Bit()
    512                  ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
    513                  : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
    514     if (Subtarget.is64Bit()) {
    515       switch (TargetVT) {
    516       case MVT::i8:
    517         Opcode = NVPTX::LD_i8_ari_64;
    518         break;
    519       case MVT::i16:
    520         Opcode = NVPTX::LD_i16_ari_64;
    521         break;
    522       case MVT::i32:
    523         Opcode = NVPTX::LD_i32_ari_64;
    524         break;
    525       case MVT::i64:
    526         Opcode = NVPTX::LD_i64_ari_64;
    527         break;
    528       case MVT::f32:
    529         Opcode = NVPTX::LD_f32_ari_64;
    530         break;
    531       case MVT::f64:
    532         Opcode = NVPTX::LD_f64_ari_64;
    533         break;
    534       default:
    535         return nullptr;
    536       }
    537     } else {
    538       switch (TargetVT) {
    539       case MVT::i8:
    540         Opcode = NVPTX::LD_i8_ari;
    541         break;
    542       case MVT::i16:
    543         Opcode = NVPTX::LD_i16_ari;
    544         break;
    545       case MVT::i32:
    546         Opcode = NVPTX::LD_i32_ari;
    547         break;
    548       case MVT::i64:
    549         Opcode = NVPTX::LD_i64_ari;
    550         break;
    551       case MVT::f32:
    552         Opcode = NVPTX::LD_f32_ari;
    553         break;
    554       case MVT::f64:
    555         Opcode = NVPTX::LD_f64_ari;
    556         break;
    557       default:
    558         return nullptr;
    559       }
    560     }
    561     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
    562                       getI32Imm(vecType), getI32Imm(fromType),
    563                       getI32Imm(fromTypeWidth), Base, Offset, Chain };
    564     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    565   } else {
    566     if (Subtarget.is64Bit()) {
    567       switch (TargetVT) {
    568       case MVT::i8:
    569         Opcode = NVPTX::LD_i8_areg_64;
    570         break;
    571       case MVT::i16:
    572         Opcode = NVPTX::LD_i16_areg_64;
    573         break;
    574       case MVT::i32:
    575         Opcode = NVPTX::LD_i32_areg_64;
    576         break;
    577       case MVT::i64:
    578         Opcode = NVPTX::LD_i64_areg_64;
    579         break;
    580       case MVT::f32:
    581         Opcode = NVPTX::LD_f32_areg_64;
    582         break;
    583       case MVT::f64:
    584         Opcode = NVPTX::LD_f64_areg_64;
    585         break;
    586       default:
    587         return nullptr;
    588       }
    589     } else {
    590       switch (TargetVT) {
    591       case MVT::i8:
    592         Opcode = NVPTX::LD_i8_areg;
    593         break;
    594       case MVT::i16:
    595         Opcode = NVPTX::LD_i16_areg;
    596         break;
    597       case MVT::i32:
    598         Opcode = NVPTX::LD_i32_areg;
    599         break;
    600       case MVT::i64:
    601         Opcode = NVPTX::LD_i64_areg;
    602         break;
    603       case MVT::f32:
    604         Opcode = NVPTX::LD_f32_areg;
    605         break;
    606       case MVT::f64:
    607         Opcode = NVPTX::LD_f64_areg;
    608         break;
    609       default:
    610         return nullptr;
    611       }
    612     }
    613     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
    614                       getI32Imm(vecType), getI32Imm(fromType),
    615                       getI32Imm(fromTypeWidth), N1, Chain };
    616     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    617   }
    618 
    619   if (NVPTXLD) {
    620     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
    621     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
    622     cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
    623   }
    624 
    625   return NVPTXLD;
    626 }
    627 
    628 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
    629 
    630   SDValue Chain = N->getOperand(0);
    631   SDValue Op1 = N->getOperand(1);
    632   SDValue Addr, Offset, Base;
    633   unsigned Opcode;
    634   SDLoc DL(N);
    635   SDNode *LD;
    636   MemSDNode *MemSD = cast<MemSDNode>(N);
    637   EVT LoadedVT = MemSD->getMemoryVT();
    638 
    639   if (!LoadedVT.isSimple())
    640     return nullptr;
    641 
    642   // Address Space Setting
    643   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
    644 
    645   // Volatile Setting
    646   // - .volatile is only availalble for .global and .shared
    647   bool IsVolatile = MemSD->isVolatile();
    648   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    649       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    650       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    651     IsVolatile = false;
    652 
    653   // Vector Setting
    654   MVT SimpleVT = LoadedVT.getSimpleVT();
    655 
    656   // Type Setting: fromType + fromTypeWidth
    657   //
    658   // Sign   : ISD::SEXTLOAD
    659   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
    660   //          type is integer
    661   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
    662   MVT ScalarVT = SimpleVT.getScalarType();
    663   // Read at least 8 bits (predicates are stored as 8-bit values)
    664   unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
    665   unsigned int FromType;
    666   // The last operand holds the original LoadSDNode::getExtensionType() value
    667   unsigned ExtensionType = cast<ConstantSDNode>(
    668       N->getOperand(N->getNumOperands() - 1))->getZExtValue();
    669   if (ExtensionType == ISD::SEXTLOAD)
    670     FromType = NVPTX::PTXLdStInstCode::Signed;
    671   else if (ScalarVT.isFloatingPoint())
    672     FromType = NVPTX::PTXLdStInstCode::Float;
    673   else
    674     FromType = NVPTX::PTXLdStInstCode::Unsigned;
    675 
    676   unsigned VecType;
    677 
    678   switch (N->getOpcode()) {
    679   case NVPTXISD::LoadV2:
    680     VecType = NVPTX::PTXLdStInstCode::V2;
    681     break;
    682   case NVPTXISD::LoadV4:
    683     VecType = NVPTX::PTXLdStInstCode::V4;
    684     break;
    685   default:
    686     return nullptr;
    687   }
    688 
    689   EVT EltVT = N->getValueType(0);
    690 
    691   if (SelectDirectAddr(Op1, Addr)) {
    692     switch (N->getOpcode()) {
    693     default:
    694       return nullptr;
    695     case NVPTXISD::LoadV2:
    696       switch (EltVT.getSimpleVT().SimpleTy) {
    697       default:
    698         return nullptr;
    699       case MVT::i8:
    700         Opcode = NVPTX::LDV_i8_v2_avar;
    701         break;
    702       case MVT::i16:
    703         Opcode = NVPTX::LDV_i16_v2_avar;
    704         break;
    705       case MVT::i32:
    706         Opcode = NVPTX::LDV_i32_v2_avar;
    707         break;
    708       case MVT::i64:
    709         Opcode = NVPTX::LDV_i64_v2_avar;
    710         break;
    711       case MVT::f32:
    712         Opcode = NVPTX::LDV_f32_v2_avar;
    713         break;
    714       case MVT::f64:
    715         Opcode = NVPTX::LDV_f64_v2_avar;
    716         break;
    717       }
    718       break;
    719     case NVPTXISD::LoadV4:
    720       switch (EltVT.getSimpleVT().SimpleTy) {
    721       default:
    722         return nullptr;
    723       case MVT::i8:
    724         Opcode = NVPTX::LDV_i8_v4_avar;
    725         break;
    726       case MVT::i16:
    727         Opcode = NVPTX::LDV_i16_v4_avar;
    728         break;
    729       case MVT::i32:
    730         Opcode = NVPTX::LDV_i32_v4_avar;
    731         break;
    732       case MVT::f32:
    733         Opcode = NVPTX::LDV_f32_v4_avar;
    734         break;
    735       }
    736       break;
    737     }
    738 
    739     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
    740                       getI32Imm(VecType), getI32Imm(FromType),
    741                       getI32Imm(FromTypeWidth), Addr, Chain };
    742     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
    743   } else if (Subtarget.is64Bit()
    744                  ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
    745                  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
    746     switch (N->getOpcode()) {
    747     default:
    748       return nullptr;
    749     case NVPTXISD::LoadV2:
    750       switch (EltVT.getSimpleVT().SimpleTy) {
    751       default:
    752         return nullptr;
    753       case MVT::i8:
    754         Opcode = NVPTX::LDV_i8_v2_asi;
    755         break;
    756       case MVT::i16:
    757         Opcode = NVPTX::LDV_i16_v2_asi;
    758         break;
    759       case MVT::i32:
    760         Opcode = NVPTX::LDV_i32_v2_asi;
    761         break;
    762       case MVT::i64:
    763         Opcode = NVPTX::LDV_i64_v2_asi;
    764         break;
    765       case MVT::f32:
    766         Opcode = NVPTX::LDV_f32_v2_asi;
    767         break;
    768       case MVT::f64:
    769         Opcode = NVPTX::LDV_f64_v2_asi;
    770         break;
    771       }
    772       break;
    773     case NVPTXISD::LoadV4:
    774       switch (EltVT.getSimpleVT().SimpleTy) {
    775       default:
    776         return nullptr;
    777       case MVT::i8:
    778         Opcode = NVPTX::LDV_i8_v4_asi;
    779         break;
    780       case MVT::i16:
    781         Opcode = NVPTX::LDV_i16_v4_asi;
    782         break;
    783       case MVT::i32:
    784         Opcode = NVPTX::LDV_i32_v4_asi;
    785         break;
    786       case MVT::f32:
    787         Opcode = NVPTX::LDV_f32_v4_asi;
    788         break;
    789       }
    790       break;
    791     }
    792 
    793     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
    794                       getI32Imm(VecType), getI32Imm(FromType),
    795                       getI32Imm(FromTypeWidth), Base, Offset, Chain };
    796     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
    797   } else if (Subtarget.is64Bit()
    798                  ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
    799                  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
    800     if (Subtarget.is64Bit()) {
    801       switch (N->getOpcode()) {
    802       default:
    803         return nullptr;
    804       case NVPTXISD::LoadV2:
    805         switch (EltVT.getSimpleVT().SimpleTy) {
    806         default:
    807           return nullptr;
    808         case MVT::i8:
    809           Opcode = NVPTX::LDV_i8_v2_ari_64;
    810           break;
    811         case MVT::i16:
    812           Opcode = NVPTX::LDV_i16_v2_ari_64;
    813           break;
    814         case MVT::i32:
    815           Opcode = NVPTX::LDV_i32_v2_ari_64;
    816           break;
    817         case MVT::i64:
    818           Opcode = NVPTX::LDV_i64_v2_ari_64;
    819           break;
    820         case MVT::f32:
    821           Opcode = NVPTX::LDV_f32_v2_ari_64;
    822           break;
    823         case MVT::f64:
    824           Opcode = NVPTX::LDV_f64_v2_ari_64;
    825           break;
    826         }
    827         break;
    828       case NVPTXISD::LoadV4:
    829         switch (EltVT.getSimpleVT().SimpleTy) {
    830         default:
    831           return nullptr;
    832         case MVT::i8:
    833           Opcode = NVPTX::LDV_i8_v4_ari_64;
    834           break;
    835         case MVT::i16:
    836           Opcode = NVPTX::LDV_i16_v4_ari_64;
    837           break;
    838         case MVT::i32:
    839           Opcode = NVPTX::LDV_i32_v4_ari_64;
    840           break;
    841         case MVT::f32:
    842           Opcode = NVPTX::LDV_f32_v4_ari_64;
    843           break;
    844         }
    845         break;
    846       }
    847     } else {
    848       switch (N->getOpcode()) {
    849       default:
    850         return nullptr;
    851       case NVPTXISD::LoadV2:
    852         switch (EltVT.getSimpleVT().SimpleTy) {
    853         default:
    854           return nullptr;
    855         case MVT::i8:
    856           Opcode = NVPTX::LDV_i8_v2_ari;
    857           break;
    858         case MVT::i16:
    859           Opcode = NVPTX::LDV_i16_v2_ari;
    860           break;
    861         case MVT::i32:
    862           Opcode = NVPTX::LDV_i32_v2_ari;
    863           break;
    864         case MVT::i64:
    865           Opcode = NVPTX::LDV_i64_v2_ari;
    866           break;
    867         case MVT::f32:
    868           Opcode = NVPTX::LDV_f32_v2_ari;
    869           break;
    870         case MVT::f64:
    871           Opcode = NVPTX::LDV_f64_v2_ari;
    872           break;
    873         }
    874         break;
    875       case NVPTXISD::LoadV4:
    876         switch (EltVT.getSimpleVT().SimpleTy) {
    877         default:
    878           return nullptr;
    879         case MVT::i8:
    880           Opcode = NVPTX::LDV_i8_v4_ari;
    881           break;
    882         case MVT::i16:
    883           Opcode = NVPTX::LDV_i16_v4_ari;
    884           break;
    885         case MVT::i32:
    886           Opcode = NVPTX::LDV_i32_v4_ari;
    887           break;
    888         case MVT::f32:
    889           Opcode = NVPTX::LDV_f32_v4_ari;
    890           break;
    891         }
    892         break;
    893       }
    894     }
    895 
    896     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
    897                       getI32Imm(VecType), getI32Imm(FromType),
    898                       getI32Imm(FromTypeWidth), Base, Offset, Chain };
    899 
    900     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
    901   } else {
    902     if (Subtarget.is64Bit()) {
    903       switch (N->getOpcode()) {
    904       default:
    905         return nullptr;
    906       case NVPTXISD::LoadV2:
    907         switch (EltVT.getSimpleVT().SimpleTy) {
    908         default:
    909           return nullptr;
    910         case MVT::i8:
    911           Opcode = NVPTX::LDV_i8_v2_areg_64;
    912           break;
    913         case MVT::i16:
    914           Opcode = NVPTX::LDV_i16_v2_areg_64;
    915           break;
    916         case MVT::i32:
    917           Opcode = NVPTX::LDV_i32_v2_areg_64;
    918           break;
    919         case MVT::i64:
    920           Opcode = NVPTX::LDV_i64_v2_areg_64;
    921           break;
    922         case MVT::f32:
    923           Opcode = NVPTX::LDV_f32_v2_areg_64;
    924           break;
    925         case MVT::f64:
    926           Opcode = NVPTX::LDV_f64_v2_areg_64;
    927           break;
    928         }
    929         break;
    930       case NVPTXISD::LoadV4:
    931         switch (EltVT.getSimpleVT().SimpleTy) {
    932         default:
    933           return nullptr;
    934         case MVT::i8:
    935           Opcode = NVPTX::LDV_i8_v4_areg_64;
    936           break;
    937         case MVT::i16:
    938           Opcode = NVPTX::LDV_i16_v4_areg_64;
    939           break;
    940         case MVT::i32:
    941           Opcode = NVPTX::LDV_i32_v4_areg_64;
    942           break;
    943         case MVT::f32:
    944           Opcode = NVPTX::LDV_f32_v4_areg_64;
    945           break;
    946         }
    947         break;
    948       }
    949     } else {
    950       switch (N->getOpcode()) {
    951       default:
    952         return nullptr;
    953       case NVPTXISD::LoadV2:
    954         switch (EltVT.getSimpleVT().SimpleTy) {
    955         default:
    956           return nullptr;
    957         case MVT::i8:
    958           Opcode = NVPTX::LDV_i8_v2_areg;
    959           break;
    960         case MVT::i16:
    961           Opcode = NVPTX::LDV_i16_v2_areg;
    962           break;
    963         case MVT::i32:
    964           Opcode = NVPTX::LDV_i32_v2_areg;
    965           break;
    966         case MVT::i64:
    967           Opcode = NVPTX::LDV_i64_v2_areg;
    968           break;
    969         case MVT::f32:
    970           Opcode = NVPTX::LDV_f32_v2_areg;
    971           break;
    972         case MVT::f64:
    973           Opcode = NVPTX::LDV_f64_v2_areg;
    974           break;
    975         }
    976         break;
    977       case NVPTXISD::LoadV4:
    978         switch (EltVT.getSimpleVT().SimpleTy) {
    979         default:
    980           return nullptr;
    981         case MVT::i8:
    982           Opcode = NVPTX::LDV_i8_v4_areg;
    983           break;
    984         case MVT::i16:
    985           Opcode = NVPTX::LDV_i16_v4_areg;
    986           break;
    987         case MVT::i32:
    988           Opcode = NVPTX::LDV_i32_v4_areg;
    989           break;
    990         case MVT::f32:
    991           Opcode = NVPTX::LDV_f32_v4_areg;
    992           break;
    993         }
    994         break;
    995       }
    996     }
    997 
    998     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
    999                       getI32Imm(VecType), getI32Imm(FromType),
   1000                       getI32Imm(FromTypeWidth), Op1, Chain };
   1001     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1002   }
   1003 
   1004   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   1005   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   1006   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
   1007 
   1008   return LD;
   1009 }
   1010 
   1011 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
   1012 
   1013   SDValue Chain = N->getOperand(0);
   1014   SDValue Op1;
   1015   MemSDNode *Mem;
   1016   bool IsLDG = true;
   1017 
   1018   // If this is an LDG intrinsic, the address is the third operand. Its its an
   1019   // LDG/LDU SD node (from custom vector handling), then its the second operand
   1020   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
   1021     Op1 = N->getOperand(2);
   1022     Mem = cast<MemIntrinsicSDNode>(N);
   1023     unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
   1024     switch (IID) {
   1025     default:
   1026       return NULL;
   1027     case Intrinsic::nvvm_ldg_global_f:
   1028     case Intrinsic::nvvm_ldg_global_i:
   1029     case Intrinsic::nvvm_ldg_global_p:
   1030       IsLDG = true;
   1031       break;
   1032     case Intrinsic::nvvm_ldu_global_f:
   1033     case Intrinsic::nvvm_ldu_global_i:
   1034     case Intrinsic::nvvm_ldu_global_p:
   1035       IsLDG = false;
   1036       break;
   1037     }
   1038   } else {
   1039     Op1 = N->getOperand(1);
   1040     Mem = cast<MemSDNode>(N);
   1041   }
   1042 
   1043   unsigned Opcode;
   1044   SDLoc DL(N);
   1045   SDNode *LD;
   1046   SDValue Base, Offset, Addr;
   1047 
   1048   EVT EltVT = Mem->getMemoryVT();
   1049   if (EltVT.isVector()) {
   1050     EltVT = EltVT.getVectorElementType();
   1051   }
   1052 
   1053   if (SelectDirectAddr(Op1, Addr)) {
   1054     switch (N->getOpcode()) {
   1055     default:
   1056       return nullptr;
   1057     case ISD::INTRINSIC_W_CHAIN:
   1058       if (IsLDG) {
   1059         switch (EltVT.getSimpleVT().SimpleTy) {
   1060         default:
   1061           return nullptr;
   1062         case MVT::i8:
   1063           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
   1064           break;
   1065         case MVT::i16:
   1066           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
   1067           break;
   1068         case MVT::i32:
   1069           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
   1070           break;
   1071         case MVT::i64:
   1072           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
   1073           break;
   1074         case MVT::f32:
   1075           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
   1076           break;
   1077         case MVT::f64:
   1078           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
   1079           break;
   1080         }
   1081       } else {
   1082         switch (EltVT.getSimpleVT().SimpleTy) {
   1083         default:
   1084           return nullptr;
   1085         case MVT::i8:
   1086           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
   1087           break;
   1088         case MVT::i16:
   1089           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
   1090           break;
   1091         case MVT::i32:
   1092           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
   1093           break;
   1094         case MVT::i64:
   1095           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
   1096           break;
   1097         case MVT::f32:
   1098           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
   1099           break;
   1100         case MVT::f64:
   1101           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
   1102           break;
   1103         }
   1104       }
   1105       break;
   1106     case NVPTXISD::LDGV2:
   1107       switch (EltVT.getSimpleVT().SimpleTy) {
   1108       default:
   1109         return nullptr;
   1110       case MVT::i8:
   1111         Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
   1112         break;
   1113       case MVT::i16:
   1114         Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
   1115         break;
   1116       case MVT::i32:
   1117         Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
   1118         break;
   1119       case MVT::i64:
   1120         Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
   1121         break;
   1122       case MVT::f32:
   1123         Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
   1124         break;
   1125       case MVT::f64:
   1126         Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
   1127         break;
   1128       }
   1129       break;
   1130     case NVPTXISD::LDUV2:
   1131       switch (EltVT.getSimpleVT().SimpleTy) {
   1132       default:
   1133         return nullptr;
   1134       case MVT::i8:
   1135         Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
   1136         break;
   1137       case MVT::i16:
   1138         Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
   1139         break;
   1140       case MVT::i32:
   1141         Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
   1142         break;
   1143       case MVT::i64:
   1144         Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
   1145         break;
   1146       case MVT::f32:
   1147         Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
   1148         break;
   1149       case MVT::f64:
   1150         Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
   1151         break;
   1152       }
   1153       break;
   1154     case NVPTXISD::LDGV4:
   1155       switch (EltVT.getSimpleVT().SimpleTy) {
   1156       default:
   1157         return nullptr;
   1158       case MVT::i8:
   1159         Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
   1160         break;
   1161       case MVT::i16:
   1162         Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
   1163         break;
   1164       case MVT::i32:
   1165         Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
   1166         break;
   1167       case MVT::f32:
   1168         Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
   1169         break;
   1170       }
   1171       break;
   1172     case NVPTXISD::LDUV4:
   1173       switch (EltVT.getSimpleVT().SimpleTy) {
   1174       default:
   1175         return nullptr;
   1176       case MVT::i8:
   1177         Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
   1178         break;
   1179       case MVT::i16:
   1180         Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
   1181         break;
   1182       case MVT::i32:
   1183         Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
   1184         break;
   1185       case MVT::f32:
   1186         Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
   1187         break;
   1188       }
   1189       break;
   1190     }
   1191 
   1192     SDValue Ops[] = { Addr, Chain };
   1193     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1194   } else if (Subtarget.is64Bit()
   1195                  ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
   1196                  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
   1197     if (Subtarget.is64Bit()) {
   1198       switch (N->getOpcode()) {
   1199       default:
   1200         return nullptr;
   1201       case ISD::INTRINSIC_W_CHAIN:
   1202         if (IsLDG) {
   1203           switch (EltVT.getSimpleVT().SimpleTy) {
   1204           default:
   1205             return nullptr;
   1206           case MVT::i8:
   1207             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
   1208             break;
   1209           case MVT::i16:
   1210             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
   1211             break;
   1212           case MVT::i32:
   1213             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
   1214             break;
   1215           case MVT::i64:
   1216             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
   1217             break;
   1218           case MVT::f32:
   1219             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
   1220             break;
   1221           case MVT::f64:
   1222             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
   1223             break;
   1224           }
   1225         } else {
   1226           switch (EltVT.getSimpleVT().SimpleTy) {
   1227           default:
   1228             return nullptr;
   1229           case MVT::i8:
   1230             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
   1231             break;
   1232           case MVT::i16:
   1233             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
   1234             break;
   1235           case MVT::i32:
   1236             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
   1237             break;
   1238           case MVT::i64:
   1239             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
   1240             break;
   1241           case MVT::f32:
   1242             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
   1243             break;
   1244           case MVT::f64:
   1245             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
   1246             break;
   1247           }
   1248         }
   1249         break;
   1250       case NVPTXISD::LDGV2:
   1251         switch (EltVT.getSimpleVT().SimpleTy) {
   1252         default:
   1253           return nullptr;
   1254         case MVT::i8:
   1255           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
   1256           break;
   1257         case MVT::i16:
   1258           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
   1259           break;
   1260         case MVT::i32:
   1261           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
   1262           break;
   1263         case MVT::i64:
   1264           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
   1265           break;
   1266         case MVT::f32:
   1267           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
   1268           break;
   1269         case MVT::f64:
   1270           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
   1271           break;
   1272         }
   1273         break;
   1274       case NVPTXISD::LDUV2:
   1275         switch (EltVT.getSimpleVT().SimpleTy) {
   1276         default:
   1277           return nullptr;
   1278         case MVT::i8:
   1279           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
   1280           break;
   1281         case MVT::i16:
   1282           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
   1283           break;
   1284         case MVT::i32:
   1285           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
   1286           break;
   1287         case MVT::i64:
   1288           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
   1289           break;
   1290         case MVT::f32:
   1291           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
   1292           break;
   1293         case MVT::f64:
   1294           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
   1295           break;
   1296         }
   1297         break;
   1298       case NVPTXISD::LDGV4:
   1299         switch (EltVT.getSimpleVT().SimpleTy) {
   1300         default:
   1301           return nullptr;
   1302         case MVT::i8:
   1303           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
   1304           break;
   1305         case MVT::i16:
   1306           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
   1307           break;
   1308         case MVT::i32:
   1309           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
   1310           break;
   1311         case MVT::f32:
   1312           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
   1313           break;
   1314         }
   1315         break;
   1316       case NVPTXISD::LDUV4:
   1317         switch (EltVT.getSimpleVT().SimpleTy) {
   1318         default:
   1319           return nullptr;
   1320         case MVT::i8:
   1321           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
   1322           break;
   1323         case MVT::i16:
   1324           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
   1325           break;
   1326         case MVT::i32:
   1327           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
   1328           break;
   1329         case MVT::f32:
   1330           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
   1331           break;
   1332         }
   1333         break;
   1334       }
   1335     } else {
   1336       switch (N->getOpcode()) {
   1337       default:
   1338         return nullptr;
   1339       case ISD::INTRINSIC_W_CHAIN:
   1340         if (IsLDG) {
   1341           switch (EltVT.getSimpleVT().SimpleTy) {
   1342           default:
   1343             return nullptr;
   1344           case MVT::i8:
   1345             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
   1346             break;
   1347           case MVT::i16:
   1348             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
   1349             break;
   1350           case MVT::i32:
   1351             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
   1352             break;
   1353           case MVT::i64:
   1354             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
   1355             break;
   1356           case MVT::f32:
   1357             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
   1358             break;
   1359           case MVT::f64:
   1360             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
   1361             break;
   1362           }
   1363         } else {
   1364           switch (EltVT.getSimpleVT().SimpleTy) {
   1365           default:
   1366             return nullptr;
   1367           case MVT::i8:
   1368             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
   1369             break;
   1370           case MVT::i16:
   1371             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
   1372             break;
   1373           case MVT::i32:
   1374             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
   1375             break;
   1376           case MVT::i64:
   1377             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
   1378             break;
   1379           case MVT::f32:
   1380             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
   1381             break;
   1382           case MVT::f64:
   1383             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
   1384             break;
   1385           }
   1386         }
   1387         break;
   1388       case NVPTXISD::LDGV2:
   1389         switch (EltVT.getSimpleVT().SimpleTy) {
   1390         default:
   1391           return nullptr;
   1392         case MVT::i8:
   1393           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
   1394           break;
   1395         case MVT::i16:
   1396           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
   1397           break;
   1398         case MVT::i32:
   1399           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
   1400           break;
   1401         case MVT::i64:
   1402           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
   1403           break;
   1404         case MVT::f32:
   1405           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
   1406           break;
   1407         case MVT::f64:
   1408           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
   1409           break;
   1410         }
   1411         break;
   1412       case NVPTXISD::LDUV2:
   1413         switch (EltVT.getSimpleVT().SimpleTy) {
   1414         default:
   1415           return nullptr;
   1416         case MVT::i8:
   1417           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
   1418           break;
   1419         case MVT::i16:
   1420           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
   1421           break;
   1422         case MVT::i32:
   1423           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
   1424           break;
   1425         case MVT::i64:
   1426           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
   1427           break;
   1428         case MVT::f32:
   1429           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
   1430           break;
   1431         case MVT::f64:
   1432           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
   1433           break;
   1434         }
   1435         break;
   1436       case NVPTXISD::LDGV4:
   1437         switch (EltVT.getSimpleVT().SimpleTy) {
   1438         default:
   1439           return nullptr;
   1440         case MVT::i8:
   1441           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
   1442           break;
   1443         case MVT::i16:
   1444           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
   1445           break;
   1446         case MVT::i32:
   1447           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
   1448           break;
   1449         case MVT::f32:
   1450           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
   1451           break;
   1452         }
   1453         break;
   1454       case NVPTXISD::LDUV4:
   1455         switch (EltVT.getSimpleVT().SimpleTy) {
   1456         default:
   1457           return nullptr;
   1458         case MVT::i8:
   1459           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
   1460           break;
   1461         case MVT::i16:
   1462           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
   1463           break;
   1464         case MVT::i32:
   1465           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
   1466           break;
   1467         case MVT::f32:
   1468           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
   1469           break;
   1470         }
   1471         break;
   1472       }
   1473     }
   1474 
   1475     SDValue Ops[] = { Base, Offset, Chain };
   1476 
   1477     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1478   } else {
   1479     if (Subtarget.is64Bit()) {
   1480       switch (N->getOpcode()) {
   1481       default:
   1482         return nullptr;
   1483       case ISD::INTRINSIC_W_CHAIN:
   1484         if (IsLDG) {
   1485           switch (EltVT.getSimpleVT().SimpleTy) {
   1486           default:
   1487             return nullptr;
   1488           case MVT::i8:
   1489             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
   1490             break;
   1491           case MVT::i16:
   1492             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
   1493             break;
   1494           case MVT::i32:
   1495             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
   1496             break;
   1497           case MVT::i64:
   1498             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
   1499             break;
   1500           case MVT::f32:
   1501             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
   1502             break;
   1503           case MVT::f64:
   1504             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
   1505             break;
   1506           }
   1507         } else {
   1508           switch (EltVT.getSimpleVT().SimpleTy) {
   1509           default:
   1510             return nullptr;
   1511           case MVT::i8:
   1512             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
   1513             break;
   1514           case MVT::i16:
   1515             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
   1516             break;
   1517           case MVT::i32:
   1518             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
   1519             break;
   1520           case MVT::i64:
   1521             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
   1522             break;
   1523           case MVT::f32:
   1524             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
   1525             break;
   1526           case MVT::f64:
   1527             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
   1528             break;
   1529           }
   1530         }
   1531         break;
   1532       case NVPTXISD::LDGV2:
   1533         switch (EltVT.getSimpleVT().SimpleTy) {
   1534         default:
   1535           return nullptr;
   1536         case MVT::i8:
   1537           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
   1538           break;
   1539         case MVT::i16:
   1540           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
   1541           break;
   1542         case MVT::i32:
   1543           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
   1544           break;
   1545         case MVT::i64:
   1546           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
   1547           break;
   1548         case MVT::f32:
   1549           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
   1550           break;
   1551         case MVT::f64:
   1552           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
   1553           break;
   1554         }
   1555         break;
   1556       case NVPTXISD::LDUV2:
   1557         switch (EltVT.getSimpleVT().SimpleTy) {
   1558         default:
   1559           return nullptr;
   1560         case MVT::i8:
   1561           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
   1562           break;
   1563         case MVT::i16:
   1564           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
   1565           break;
   1566         case MVT::i32:
   1567           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
   1568           break;
   1569         case MVT::i64:
   1570           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
   1571           break;
   1572         case MVT::f32:
   1573           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
   1574           break;
   1575         case MVT::f64:
   1576           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
   1577           break;
   1578         }
   1579         break;
   1580       case NVPTXISD::LDGV4:
   1581         switch (EltVT.getSimpleVT().SimpleTy) {
   1582         default:
   1583           return nullptr;
   1584         case MVT::i8:
   1585           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
   1586           break;
   1587         case MVT::i16:
   1588           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
   1589           break;
   1590         case MVT::i32:
   1591           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
   1592           break;
   1593         case MVT::f32:
   1594           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
   1595           break;
   1596         }
   1597         break;
   1598       case NVPTXISD::LDUV4:
   1599         switch (EltVT.getSimpleVT().SimpleTy) {
   1600         default:
   1601           return nullptr;
   1602         case MVT::i8:
   1603           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
   1604           break;
   1605         case MVT::i16:
   1606           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
   1607           break;
   1608         case MVT::i32:
   1609           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
   1610           break;
   1611         case MVT::f32:
   1612           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
   1613           break;
   1614         }
   1615         break;
   1616       }
   1617     } else {
   1618       switch (N->getOpcode()) {
   1619       default:
   1620         return nullptr;
   1621       case ISD::INTRINSIC_W_CHAIN:
   1622         if (IsLDG) {
   1623           switch (EltVT.getSimpleVT().SimpleTy) {
   1624           default:
   1625             return nullptr;
   1626           case MVT::i8:
   1627             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
   1628             break;
   1629           case MVT::i16:
   1630             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
   1631             break;
   1632           case MVT::i32:
   1633             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
   1634             break;
   1635           case MVT::i64:
   1636             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
   1637             break;
   1638           case MVT::f32:
   1639             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
   1640             break;
   1641           case MVT::f64:
   1642             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
   1643             break;
   1644           }
   1645         } else {
   1646           switch (EltVT.getSimpleVT().SimpleTy) {
   1647           default:
   1648             return nullptr;
   1649           case MVT::i8:
   1650             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
   1651             break;
   1652           case MVT::i16:
   1653             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
   1654             break;
   1655           case MVT::i32:
   1656             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
   1657             break;
   1658           case MVT::i64:
   1659             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
   1660             break;
   1661           case MVT::f32:
   1662             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
   1663             break;
   1664           case MVT::f64:
   1665             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
   1666             break;
   1667           }
   1668         }
   1669         break;
   1670       case NVPTXISD::LDGV2:
   1671         switch (EltVT.getSimpleVT().SimpleTy) {
   1672         default:
   1673           return nullptr;
   1674         case MVT::i8:
   1675           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
   1676           break;
   1677         case MVT::i16:
   1678           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
   1679           break;
   1680         case MVT::i32:
   1681           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
   1682           break;
   1683         case MVT::i64:
   1684           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
   1685           break;
   1686         case MVT::f32:
   1687           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
   1688           break;
   1689         case MVT::f64:
   1690           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
   1691           break;
   1692         }
   1693         break;
   1694       case NVPTXISD::LDUV2:
   1695         switch (EltVT.getSimpleVT().SimpleTy) {
   1696         default:
   1697           return nullptr;
   1698         case MVT::i8:
   1699           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
   1700           break;
   1701         case MVT::i16:
   1702           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
   1703           break;
   1704         case MVT::i32:
   1705           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
   1706           break;
   1707         case MVT::i64:
   1708           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
   1709           break;
   1710         case MVT::f32:
   1711           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
   1712           break;
   1713         case MVT::f64:
   1714           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
   1715           break;
   1716         }
   1717         break;
   1718       case NVPTXISD::LDGV4:
   1719         switch (EltVT.getSimpleVT().SimpleTy) {
   1720         default:
   1721           return nullptr;
   1722         case MVT::i8:
   1723           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
   1724           break;
   1725         case MVT::i16:
   1726           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
   1727           break;
   1728         case MVT::i32:
   1729           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
   1730           break;
   1731         case MVT::f32:
   1732           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
   1733           break;
   1734         }
   1735         break;
   1736       case NVPTXISD::LDUV4:
   1737         switch (EltVT.getSimpleVT().SimpleTy) {
   1738         default:
   1739           return nullptr;
   1740         case MVT::i8:
   1741           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
   1742           break;
   1743         case MVT::i16:
   1744           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
   1745           break;
   1746         case MVT::i32:
   1747           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
   1748           break;
   1749         case MVT::f32:
   1750           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
   1751           break;
   1752         }
   1753         break;
   1754       }
   1755     }
   1756 
   1757     SDValue Ops[] = { Op1, Chain };
   1758     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1759   }
   1760 
   1761   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   1762   MemRefs0[0] = Mem->getMemOperand();
   1763   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
   1764 
   1765   return LD;
   1766 }
   1767 
   1768 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
   1769   SDLoc dl(N);
   1770   StoreSDNode *ST = cast<StoreSDNode>(N);
   1771   EVT StoreVT = ST->getMemoryVT();
   1772   SDNode *NVPTXST = nullptr;
   1773 
   1774   // do not support pre/post inc/dec
   1775   if (ST->isIndexed())
   1776     return nullptr;
   1777 
   1778   if (!StoreVT.isSimple())
   1779     return nullptr;
   1780 
   1781   // Address Space Setting
   1782   unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
   1783 
   1784   // Volatile Setting
   1785   // - .volatile is only availalble for .global and .shared
   1786   bool isVolatile = ST->isVolatile();
   1787   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
   1788       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
   1789       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
   1790     isVolatile = false;
   1791 
   1792   // Vector Setting
   1793   MVT SimpleVT = StoreVT.getSimpleVT();
   1794   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
   1795   if (SimpleVT.isVector()) {
   1796     unsigned num = SimpleVT.getVectorNumElements();
   1797     if (num == 2)
   1798       vecType = NVPTX::PTXLdStInstCode::V2;
   1799     else if (num == 4)
   1800       vecType = NVPTX::PTXLdStInstCode::V4;
   1801     else
   1802       return nullptr;
   1803   }
   1804 
   1805   // Type Setting: toType + toTypeWidth
   1806   // - for integer type, always use 'u'
   1807   //
   1808   MVT ScalarVT = SimpleVT.getScalarType();
   1809   unsigned toTypeWidth = ScalarVT.getSizeInBits();
   1810   unsigned int toType;
   1811   if (ScalarVT.isFloatingPoint())
   1812     toType = NVPTX::PTXLdStInstCode::Float;
   1813   else
   1814     toType = NVPTX::PTXLdStInstCode::Unsigned;
   1815 
   1816   // Create the machine instruction DAG
   1817   SDValue Chain = N->getOperand(0);
   1818   SDValue N1 = N->getOperand(1);
   1819   SDValue N2 = N->getOperand(2);
   1820   SDValue Addr;
   1821   SDValue Offset, Base;
   1822   unsigned Opcode;
   1823   MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
   1824 
   1825   if (SelectDirectAddr(N2, Addr)) {
   1826     switch (SourceVT) {
   1827     case MVT::i8:
   1828       Opcode = NVPTX::ST_i8_avar;
   1829       break;
   1830     case MVT::i16:
   1831       Opcode = NVPTX::ST_i16_avar;
   1832       break;
   1833     case MVT::i32:
   1834       Opcode = NVPTX::ST_i32_avar;
   1835       break;
   1836     case MVT::i64:
   1837       Opcode = NVPTX::ST_i64_avar;
   1838       break;
   1839     case MVT::f32:
   1840       Opcode = NVPTX::ST_f32_avar;
   1841       break;
   1842     case MVT::f64:
   1843       Opcode = NVPTX::ST_f64_avar;
   1844       break;
   1845     default:
   1846       return nullptr;
   1847     }
   1848     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
   1849                       getI32Imm(vecType), getI32Imm(toType),
   1850                       getI32Imm(toTypeWidth), Addr, Chain };
   1851     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   1852   } else if (Subtarget.is64Bit()
   1853                  ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
   1854                  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
   1855     switch (SourceVT) {
   1856     case MVT::i8:
   1857       Opcode = NVPTX::ST_i8_asi;
   1858       break;
   1859     case MVT::i16:
   1860       Opcode = NVPTX::ST_i16_asi;
   1861       break;
   1862     case MVT::i32:
   1863       Opcode = NVPTX::ST_i32_asi;
   1864       break;
   1865     case MVT::i64:
   1866       Opcode = NVPTX::ST_i64_asi;
   1867       break;
   1868     case MVT::f32:
   1869       Opcode = NVPTX::ST_f32_asi;
   1870       break;
   1871     case MVT::f64:
   1872       Opcode = NVPTX::ST_f64_asi;
   1873       break;
   1874     default:
   1875       return nullptr;
   1876     }
   1877     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
   1878                       getI32Imm(vecType), getI32Imm(toType),
   1879                       getI32Imm(toTypeWidth), Base, Offset, Chain };
   1880     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   1881   } else if (Subtarget.is64Bit()
   1882                  ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
   1883                  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
   1884     if (Subtarget.is64Bit()) {
   1885       switch (SourceVT) {
   1886       case MVT::i8:
   1887         Opcode = NVPTX::ST_i8_ari_64;
   1888         break;
   1889       case MVT::i16:
   1890         Opcode = NVPTX::ST_i16_ari_64;
   1891         break;
   1892       case MVT::i32:
   1893         Opcode = NVPTX::ST_i32_ari_64;
   1894         break;
   1895       case MVT::i64:
   1896         Opcode = NVPTX::ST_i64_ari_64;
   1897         break;
   1898       case MVT::f32:
   1899         Opcode = NVPTX::ST_f32_ari_64;
   1900         break;
   1901       case MVT::f64:
   1902         Opcode = NVPTX::ST_f64_ari_64;
   1903         break;
   1904       default:
   1905         return nullptr;
   1906       }
   1907     } else {
   1908       switch (SourceVT) {
   1909       case MVT::i8:
   1910         Opcode = NVPTX::ST_i8_ari;
   1911         break;
   1912       case MVT::i16:
   1913         Opcode = NVPTX::ST_i16_ari;
   1914         break;
   1915       case MVT::i32:
   1916         Opcode = NVPTX::ST_i32_ari;
   1917         break;
   1918       case MVT::i64:
   1919         Opcode = NVPTX::ST_i64_ari;
   1920         break;
   1921       case MVT::f32:
   1922         Opcode = NVPTX::ST_f32_ari;
   1923         break;
   1924       case MVT::f64:
   1925         Opcode = NVPTX::ST_f64_ari;
   1926         break;
   1927       default:
   1928         return nullptr;
   1929       }
   1930     }
   1931     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
   1932                       getI32Imm(vecType), getI32Imm(toType),
   1933                       getI32Imm(toTypeWidth), Base, Offset, Chain };
   1934     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   1935   } else {
   1936     if (Subtarget.is64Bit()) {
   1937       switch (SourceVT) {
   1938       case MVT::i8:
   1939         Opcode = NVPTX::ST_i8_areg_64;
   1940         break;
   1941       case MVT::i16:
   1942         Opcode = NVPTX::ST_i16_areg_64;
   1943         break;
   1944       case MVT::i32:
   1945         Opcode = NVPTX::ST_i32_areg_64;
   1946         break;
   1947       case MVT::i64:
   1948         Opcode = NVPTX::ST_i64_areg_64;
   1949         break;
   1950       case MVT::f32:
   1951         Opcode = NVPTX::ST_f32_areg_64;
   1952         break;
   1953       case MVT::f64:
   1954         Opcode = NVPTX::ST_f64_areg_64;
   1955         break;
   1956       default:
   1957         return nullptr;
   1958       }
   1959     } else {
   1960       switch (SourceVT) {
   1961       case MVT::i8:
   1962         Opcode = NVPTX::ST_i8_areg;
   1963         break;
   1964       case MVT::i16:
   1965         Opcode = NVPTX::ST_i16_areg;
   1966         break;
   1967       case MVT::i32:
   1968         Opcode = NVPTX::ST_i32_areg;
   1969         break;
   1970       case MVT::i64:
   1971         Opcode = NVPTX::ST_i64_areg;
   1972         break;
   1973       case MVT::f32:
   1974         Opcode = NVPTX::ST_f32_areg;
   1975         break;
   1976       case MVT::f64:
   1977         Opcode = NVPTX::ST_f64_areg;
   1978         break;
   1979       default:
   1980         return nullptr;
   1981       }
   1982     }
   1983     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
   1984                       getI32Imm(vecType), getI32Imm(toType),
   1985                       getI32Imm(toTypeWidth), N2, Chain };
   1986     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   1987   }
   1988 
   1989   if (NVPTXST) {
   1990     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   1991     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   1992     cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
   1993   }
   1994 
   1995   return NVPTXST;
   1996 }
   1997 
   1998 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
   1999   SDValue Chain = N->getOperand(0);
   2000   SDValue Op1 = N->getOperand(1);
   2001   SDValue Addr, Offset, Base;
   2002   unsigned Opcode;
   2003   SDLoc DL(N);
   2004   SDNode *ST;
   2005   EVT EltVT = Op1.getValueType();
   2006   MemSDNode *MemSD = cast<MemSDNode>(N);
   2007   EVT StoreVT = MemSD->getMemoryVT();
   2008 
   2009   // Address Space Setting
   2010   unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
   2011 
   2012   if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
   2013     report_fatal_error("Cannot store to pointer that points to constant "
   2014                        "memory space");
   2015   }
   2016 
   2017   // Volatile Setting
   2018   // - .volatile is only availalble for .global and .shared
   2019   bool IsVolatile = MemSD->isVolatile();
   2020   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
   2021       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
   2022       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
   2023     IsVolatile = false;
   2024 
   2025   // Type Setting: toType + toTypeWidth
   2026   // - for integer type, always use 'u'
   2027   assert(StoreVT.isSimple() && "Store value is not simple");
   2028   MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
   2029   unsigned ToTypeWidth = ScalarVT.getSizeInBits();
   2030   unsigned ToType;
   2031   if (ScalarVT.isFloatingPoint())
   2032     ToType = NVPTX::PTXLdStInstCode::Float;
   2033   else
   2034     ToType = NVPTX::PTXLdStInstCode::Unsigned;
   2035 
   2036   SmallVector<SDValue, 12> StOps;
   2037   SDValue N2;
   2038   unsigned VecType;
   2039 
   2040   switch (N->getOpcode()) {
   2041   case NVPTXISD::StoreV2:
   2042     VecType = NVPTX::PTXLdStInstCode::V2;
   2043     StOps.push_back(N->getOperand(1));
   2044     StOps.push_back(N->getOperand(2));
   2045     N2 = N->getOperand(3);
   2046     break;
   2047   case NVPTXISD::StoreV4:
   2048     VecType = NVPTX::PTXLdStInstCode::V4;
   2049     StOps.push_back(N->getOperand(1));
   2050     StOps.push_back(N->getOperand(2));
   2051     StOps.push_back(N->getOperand(3));
   2052     StOps.push_back(N->getOperand(4));
   2053     N2 = N->getOperand(5);
   2054     break;
   2055   default:
   2056     return nullptr;
   2057   }
   2058 
   2059   StOps.push_back(getI32Imm(IsVolatile));
   2060   StOps.push_back(getI32Imm(CodeAddrSpace));
   2061   StOps.push_back(getI32Imm(VecType));
   2062   StOps.push_back(getI32Imm(ToType));
   2063   StOps.push_back(getI32Imm(ToTypeWidth));
   2064 
   2065   if (SelectDirectAddr(N2, Addr)) {
   2066     switch (N->getOpcode()) {
   2067     default:
   2068       return nullptr;
   2069     case NVPTXISD::StoreV2:
   2070       switch (EltVT.getSimpleVT().SimpleTy) {
   2071       default:
   2072         return nullptr;
   2073       case MVT::i8:
   2074         Opcode = NVPTX::STV_i8_v2_avar;
   2075         break;
   2076       case MVT::i16:
   2077         Opcode = NVPTX::STV_i16_v2_avar;
   2078         break;
   2079       case MVT::i32:
   2080         Opcode = NVPTX::STV_i32_v2_avar;
   2081         break;
   2082       case MVT::i64:
   2083         Opcode = NVPTX::STV_i64_v2_avar;
   2084         break;
   2085       case MVT::f32:
   2086         Opcode = NVPTX::STV_f32_v2_avar;
   2087         break;
   2088       case MVT::f64:
   2089         Opcode = NVPTX::STV_f64_v2_avar;
   2090         break;
   2091       }
   2092       break;
   2093     case NVPTXISD::StoreV4:
   2094       switch (EltVT.getSimpleVT().SimpleTy) {
   2095       default:
   2096         return nullptr;
   2097       case MVT::i8:
   2098         Opcode = NVPTX::STV_i8_v4_avar;
   2099         break;
   2100       case MVT::i16:
   2101         Opcode = NVPTX::STV_i16_v4_avar;
   2102         break;
   2103       case MVT::i32:
   2104         Opcode = NVPTX::STV_i32_v4_avar;
   2105         break;
   2106       case MVT::f32:
   2107         Opcode = NVPTX::STV_f32_v4_avar;
   2108         break;
   2109       }
   2110       break;
   2111     }
   2112     StOps.push_back(Addr);
   2113   } else if (Subtarget.is64Bit()
   2114                  ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
   2115                  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
   2116     switch (N->getOpcode()) {
   2117     default:
   2118       return nullptr;
   2119     case NVPTXISD::StoreV2:
   2120       switch (EltVT.getSimpleVT().SimpleTy) {
   2121       default:
   2122         return nullptr;
   2123       case MVT::i8:
   2124         Opcode = NVPTX::STV_i8_v2_asi;
   2125         break;
   2126       case MVT::i16:
   2127         Opcode = NVPTX::STV_i16_v2_asi;
   2128         break;
   2129       case MVT::i32:
   2130         Opcode = NVPTX::STV_i32_v2_asi;
   2131         break;
   2132       case MVT::i64:
   2133         Opcode = NVPTX::STV_i64_v2_asi;
   2134         break;
   2135       case MVT::f32:
   2136         Opcode = NVPTX::STV_f32_v2_asi;
   2137         break;
   2138       case MVT::f64:
   2139         Opcode = NVPTX::STV_f64_v2_asi;
   2140         break;
   2141       }
   2142       break;
   2143     case NVPTXISD::StoreV4:
   2144       switch (EltVT.getSimpleVT().SimpleTy) {
   2145       default:
   2146         return nullptr;
   2147       case MVT::i8:
   2148         Opcode = NVPTX::STV_i8_v4_asi;
   2149         break;
   2150       case MVT::i16:
   2151         Opcode = NVPTX::STV_i16_v4_asi;
   2152         break;
   2153       case MVT::i32:
   2154         Opcode = NVPTX::STV_i32_v4_asi;
   2155         break;
   2156       case MVT::f32:
   2157         Opcode = NVPTX::STV_f32_v4_asi;
   2158         break;
   2159       }
   2160       break;
   2161     }
   2162     StOps.push_back(Base);
   2163     StOps.push_back(Offset);
   2164   } else if (Subtarget.is64Bit()
   2165                  ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
   2166                  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
   2167     if (Subtarget.is64Bit()) {
   2168       switch (N->getOpcode()) {
   2169       default:
   2170         return nullptr;
   2171       case NVPTXISD::StoreV2:
   2172         switch (EltVT.getSimpleVT().SimpleTy) {
   2173         default:
   2174           return nullptr;
   2175         case MVT::i8:
   2176           Opcode = NVPTX::STV_i8_v2_ari_64;
   2177           break;
   2178         case MVT::i16:
   2179           Opcode = NVPTX::STV_i16_v2_ari_64;
   2180           break;
   2181         case MVT::i32:
   2182           Opcode = NVPTX::STV_i32_v2_ari_64;
   2183           break;
   2184         case MVT::i64:
   2185           Opcode = NVPTX::STV_i64_v2_ari_64;
   2186           break;
   2187         case MVT::f32:
   2188           Opcode = NVPTX::STV_f32_v2_ari_64;
   2189           break;
   2190         case MVT::f64:
   2191           Opcode = NVPTX::STV_f64_v2_ari_64;
   2192           break;
   2193         }
   2194         break;
   2195       case NVPTXISD::StoreV4:
   2196         switch (EltVT.getSimpleVT().SimpleTy) {
   2197         default:
   2198           return nullptr;
   2199         case MVT::i8:
   2200           Opcode = NVPTX::STV_i8_v4_ari_64;
   2201           break;
   2202         case MVT::i16:
   2203           Opcode = NVPTX::STV_i16_v4_ari_64;
   2204           break;
   2205         case MVT::i32:
   2206           Opcode = NVPTX::STV_i32_v4_ari_64;
   2207           break;
   2208         case MVT::f32:
   2209           Opcode = NVPTX::STV_f32_v4_ari_64;
   2210           break;
   2211         }
   2212         break;
   2213       }
   2214     } else {
   2215       switch (N->getOpcode()) {
   2216       default:
   2217         return nullptr;
   2218       case NVPTXISD::StoreV2:
   2219         switch (EltVT.getSimpleVT().SimpleTy) {
   2220         default:
   2221           return nullptr;
   2222         case MVT::i8:
   2223           Opcode = NVPTX::STV_i8_v2_ari;
   2224           break;
   2225         case MVT::i16:
   2226           Opcode = NVPTX::STV_i16_v2_ari;
   2227           break;
   2228         case MVT::i32:
   2229           Opcode = NVPTX::STV_i32_v2_ari;
   2230           break;
   2231         case MVT::i64:
   2232           Opcode = NVPTX::STV_i64_v2_ari;
   2233           break;
   2234         case MVT::f32:
   2235           Opcode = NVPTX::STV_f32_v2_ari;
   2236           break;
   2237         case MVT::f64:
   2238           Opcode = NVPTX::STV_f64_v2_ari;
   2239           break;
   2240         }
   2241         break;
   2242       case NVPTXISD::StoreV4:
   2243         switch (EltVT.getSimpleVT().SimpleTy) {
   2244         default:
   2245           return nullptr;
   2246         case MVT::i8:
   2247           Opcode = NVPTX::STV_i8_v4_ari;
   2248           break;
   2249         case MVT::i16:
   2250           Opcode = NVPTX::STV_i16_v4_ari;
   2251           break;
   2252         case MVT::i32:
   2253           Opcode = NVPTX::STV_i32_v4_ari;
   2254           break;
   2255         case MVT::f32:
   2256           Opcode = NVPTX::STV_f32_v4_ari;
   2257           break;
   2258         }
   2259         break;
   2260       }
   2261     }
   2262     StOps.push_back(Base);
   2263     StOps.push_back(Offset);
   2264   } else {
   2265     if (Subtarget.is64Bit()) {
   2266       switch (N->getOpcode()) {
   2267       default:
   2268         return nullptr;
   2269       case NVPTXISD::StoreV2:
   2270         switch (EltVT.getSimpleVT().SimpleTy) {
   2271         default:
   2272           return nullptr;
   2273         case MVT::i8:
   2274           Opcode = NVPTX::STV_i8_v2_areg_64;
   2275           break;
   2276         case MVT::i16:
   2277           Opcode = NVPTX::STV_i16_v2_areg_64;
   2278           break;
   2279         case MVT::i32:
   2280           Opcode = NVPTX::STV_i32_v2_areg_64;
   2281           break;
   2282         case MVT::i64:
   2283           Opcode = NVPTX::STV_i64_v2_areg_64;
   2284           break;
   2285         case MVT::f32:
   2286           Opcode = NVPTX::STV_f32_v2_areg_64;
   2287           break;
   2288         case MVT::f64:
   2289           Opcode = NVPTX::STV_f64_v2_areg_64;
   2290           break;
   2291         }
   2292         break;
   2293       case NVPTXISD::StoreV4:
   2294         switch (EltVT.getSimpleVT().SimpleTy) {
   2295         default:
   2296           return nullptr;
   2297         case MVT::i8:
   2298           Opcode = NVPTX::STV_i8_v4_areg_64;
   2299           break;
   2300         case MVT::i16:
   2301           Opcode = NVPTX::STV_i16_v4_areg_64;
   2302           break;
   2303         case MVT::i32:
   2304           Opcode = NVPTX::STV_i32_v4_areg_64;
   2305           break;
   2306         case MVT::f32:
   2307           Opcode = NVPTX::STV_f32_v4_areg_64;
   2308           break;
   2309         }
   2310         break;
   2311       }
   2312     } else {
   2313       switch (N->getOpcode()) {
   2314       default:
   2315         return nullptr;
   2316       case NVPTXISD::StoreV2:
   2317         switch (EltVT.getSimpleVT().SimpleTy) {
   2318         default:
   2319           return nullptr;
   2320         case MVT::i8:
   2321           Opcode = NVPTX::STV_i8_v2_areg;
   2322           break;
   2323         case MVT::i16:
   2324           Opcode = NVPTX::STV_i16_v2_areg;
   2325           break;
   2326         case MVT::i32:
   2327           Opcode = NVPTX::STV_i32_v2_areg;
   2328           break;
   2329         case MVT::i64:
   2330           Opcode = NVPTX::STV_i64_v2_areg;
   2331           break;
   2332         case MVT::f32:
   2333           Opcode = NVPTX::STV_f32_v2_areg;
   2334           break;
   2335         case MVT::f64:
   2336           Opcode = NVPTX::STV_f64_v2_areg;
   2337           break;
   2338         }
   2339         break;
   2340       case NVPTXISD::StoreV4:
   2341         switch (EltVT.getSimpleVT().SimpleTy) {
   2342         default:
   2343           return nullptr;
   2344         case MVT::i8:
   2345           Opcode = NVPTX::STV_i8_v4_areg;
   2346           break;
   2347         case MVT::i16:
   2348           Opcode = NVPTX::STV_i16_v4_areg;
   2349           break;
   2350         case MVT::i32:
   2351           Opcode = NVPTX::STV_i32_v4_areg;
   2352           break;
   2353         case MVT::f32:
   2354           Opcode = NVPTX::STV_f32_v4_areg;
   2355           break;
   2356         }
   2357         break;
   2358       }
   2359     }
   2360     StOps.push_back(N2);
   2361   }
   2362 
   2363   StOps.push_back(Chain);
   2364 
   2365   ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
   2366 
   2367   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2368   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2369   cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2370 
   2371   return ST;
   2372 }
   2373 
   2374 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
   2375   SDValue Chain = Node->getOperand(0);
   2376   SDValue Offset = Node->getOperand(2);
   2377   SDValue Flag = Node->getOperand(3);
   2378   SDLoc DL(Node);
   2379   MemSDNode *Mem = cast<MemSDNode>(Node);
   2380 
   2381   unsigned VecSize;
   2382   switch (Node->getOpcode()) {
   2383   default:
   2384     return nullptr;
   2385   case NVPTXISD::LoadParam:
   2386     VecSize = 1;
   2387     break;
   2388   case NVPTXISD::LoadParamV2:
   2389     VecSize = 2;
   2390     break;
   2391   case NVPTXISD::LoadParamV4:
   2392     VecSize = 4;
   2393     break;
   2394   }
   2395 
   2396   EVT EltVT = Node->getValueType(0);
   2397   EVT MemVT = Mem->getMemoryVT();
   2398 
   2399   unsigned Opc = 0;
   2400 
   2401   switch (VecSize) {
   2402   default:
   2403     return nullptr;
   2404   case 1:
   2405     switch (MemVT.getSimpleVT().SimpleTy) {
   2406     default:
   2407       return nullptr;
   2408     case MVT::i1:
   2409       Opc = NVPTX::LoadParamMemI8;
   2410       break;
   2411     case MVT::i8:
   2412       Opc = NVPTX::LoadParamMemI8;
   2413       break;
   2414     case MVT::i16:
   2415       Opc = NVPTX::LoadParamMemI16;
   2416       break;
   2417     case MVT::i32:
   2418       Opc = NVPTX::LoadParamMemI32;
   2419       break;
   2420     case MVT::i64:
   2421       Opc = NVPTX::LoadParamMemI64;
   2422       break;
   2423     case MVT::f32:
   2424       Opc = NVPTX::LoadParamMemF32;
   2425       break;
   2426     case MVT::f64:
   2427       Opc = NVPTX::LoadParamMemF64;
   2428       break;
   2429     }
   2430     break;
   2431   case 2:
   2432     switch (MemVT.getSimpleVT().SimpleTy) {
   2433     default:
   2434       return nullptr;
   2435     case MVT::i1:
   2436       Opc = NVPTX::LoadParamMemV2I8;
   2437       break;
   2438     case MVT::i8:
   2439       Opc = NVPTX::LoadParamMemV2I8;
   2440       break;
   2441     case MVT::i16:
   2442       Opc = NVPTX::LoadParamMemV2I16;
   2443       break;
   2444     case MVT::i32:
   2445       Opc = NVPTX::LoadParamMemV2I32;
   2446       break;
   2447     case MVT::i64:
   2448       Opc = NVPTX::LoadParamMemV2I64;
   2449       break;
   2450     case MVT::f32:
   2451       Opc = NVPTX::LoadParamMemV2F32;
   2452       break;
   2453     case MVT::f64:
   2454       Opc = NVPTX::LoadParamMemV2F64;
   2455       break;
   2456     }
   2457     break;
   2458   case 4:
   2459     switch (MemVT.getSimpleVT().SimpleTy) {
   2460     default:
   2461       return nullptr;
   2462     case MVT::i1:
   2463       Opc = NVPTX::LoadParamMemV4I8;
   2464       break;
   2465     case MVT::i8:
   2466       Opc = NVPTX::LoadParamMemV4I8;
   2467       break;
   2468     case MVT::i16:
   2469       Opc = NVPTX::LoadParamMemV4I16;
   2470       break;
   2471     case MVT::i32:
   2472       Opc = NVPTX::LoadParamMemV4I32;
   2473       break;
   2474     case MVT::f32:
   2475       Opc = NVPTX::LoadParamMemV4F32;
   2476       break;
   2477     }
   2478     break;
   2479   }
   2480 
   2481   SDVTList VTs;
   2482   if (VecSize == 1) {
   2483     VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
   2484   } else if (VecSize == 2) {
   2485     VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
   2486   } else {
   2487     EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
   2488     VTs = CurDAG->getVTList(EVTs);
   2489   }
   2490 
   2491   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
   2492 
   2493   SmallVector<SDValue, 2> Ops;
   2494   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
   2495   Ops.push_back(Chain);
   2496   Ops.push_back(Flag);
   2497 
   2498   SDNode *Ret =
   2499       CurDAG->getMachineNode(Opc, DL, VTs, Ops);
   2500   return Ret;
   2501 }
   2502 
   2503 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
   2504   SDLoc DL(N);
   2505   SDValue Chain = N->getOperand(0);
   2506   SDValue Offset = N->getOperand(1);
   2507   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
   2508   MemSDNode *Mem = cast<MemSDNode>(N);
   2509 
   2510   // How many elements do we have?
   2511   unsigned NumElts = 1;
   2512   switch (N->getOpcode()) {
   2513   default:
   2514     return nullptr;
   2515   case NVPTXISD::StoreRetval:
   2516     NumElts = 1;
   2517     break;
   2518   case NVPTXISD::StoreRetvalV2:
   2519     NumElts = 2;
   2520     break;
   2521   case NVPTXISD::StoreRetvalV4:
   2522     NumElts = 4;
   2523     break;
   2524   }
   2525 
   2526   // Build vector of operands
   2527   SmallVector<SDValue, 6> Ops;
   2528   for (unsigned i = 0; i < NumElts; ++i)
   2529     Ops.push_back(N->getOperand(i + 2));
   2530   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
   2531   Ops.push_back(Chain);
   2532 
   2533   // Determine target opcode
   2534   // If we have an i1, use an 8-bit store. The lowering code in
   2535   // NVPTXISelLowering will have already emitted an upcast.
   2536   unsigned Opcode = 0;
   2537   switch (NumElts) {
   2538   default:
   2539     return nullptr;
   2540   case 1:
   2541     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2542     default:
   2543       return nullptr;
   2544     case MVT::i1:
   2545       Opcode = NVPTX::StoreRetvalI8;
   2546       break;
   2547     case MVT::i8:
   2548       Opcode = NVPTX::StoreRetvalI8;
   2549       break;
   2550     case MVT::i16:
   2551       Opcode = NVPTX::StoreRetvalI16;
   2552       break;
   2553     case MVT::i32:
   2554       Opcode = NVPTX::StoreRetvalI32;
   2555       break;
   2556     case MVT::i64:
   2557       Opcode = NVPTX::StoreRetvalI64;
   2558       break;
   2559     case MVT::f32:
   2560       Opcode = NVPTX::StoreRetvalF32;
   2561       break;
   2562     case MVT::f64:
   2563       Opcode = NVPTX::StoreRetvalF64;
   2564       break;
   2565     }
   2566     break;
   2567   case 2:
   2568     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2569     default:
   2570       return nullptr;
   2571     case MVT::i1:
   2572       Opcode = NVPTX::StoreRetvalV2I8;
   2573       break;
   2574     case MVT::i8:
   2575       Opcode = NVPTX::StoreRetvalV2I8;
   2576       break;
   2577     case MVT::i16:
   2578       Opcode = NVPTX::StoreRetvalV2I16;
   2579       break;
   2580     case MVT::i32:
   2581       Opcode = NVPTX::StoreRetvalV2I32;
   2582       break;
   2583     case MVT::i64:
   2584       Opcode = NVPTX::StoreRetvalV2I64;
   2585       break;
   2586     case MVT::f32:
   2587       Opcode = NVPTX::StoreRetvalV2F32;
   2588       break;
   2589     case MVT::f64:
   2590       Opcode = NVPTX::StoreRetvalV2F64;
   2591       break;
   2592     }
   2593     break;
   2594   case 4:
   2595     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2596     default:
   2597       return nullptr;
   2598     case MVT::i1:
   2599       Opcode = NVPTX::StoreRetvalV4I8;
   2600       break;
   2601     case MVT::i8:
   2602       Opcode = NVPTX::StoreRetvalV4I8;
   2603       break;
   2604     case MVT::i16:
   2605       Opcode = NVPTX::StoreRetvalV4I16;
   2606       break;
   2607     case MVT::i32:
   2608       Opcode = NVPTX::StoreRetvalV4I32;
   2609       break;
   2610     case MVT::f32:
   2611       Opcode = NVPTX::StoreRetvalV4F32;
   2612       break;
   2613     }
   2614     break;
   2615   }
   2616 
   2617   SDNode *Ret =
   2618       CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
   2619   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2620   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2621   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2622 
   2623   return Ret;
   2624 }
   2625 
   2626 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
   2627   SDLoc DL(N);
   2628   SDValue Chain = N->getOperand(0);
   2629   SDValue Param = N->getOperand(1);
   2630   unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
   2631   SDValue Offset = N->getOperand(2);
   2632   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
   2633   MemSDNode *Mem = cast<MemSDNode>(N);
   2634   SDValue Flag = N->getOperand(N->getNumOperands() - 1);
   2635 
   2636   // How many elements do we have?
   2637   unsigned NumElts = 1;
   2638   switch (N->getOpcode()) {
   2639   default:
   2640     return nullptr;
   2641   case NVPTXISD::StoreParamU32:
   2642   case NVPTXISD::StoreParamS32:
   2643   case NVPTXISD::StoreParam:
   2644     NumElts = 1;
   2645     break;
   2646   case NVPTXISD::StoreParamV2:
   2647     NumElts = 2;
   2648     break;
   2649   case NVPTXISD::StoreParamV4:
   2650     NumElts = 4;
   2651     break;
   2652   }
   2653 
   2654   // Build vector of operands
   2655   SmallVector<SDValue, 8> Ops;
   2656   for (unsigned i = 0; i < NumElts; ++i)
   2657     Ops.push_back(N->getOperand(i + 3));
   2658   Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
   2659   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
   2660   Ops.push_back(Chain);
   2661   Ops.push_back(Flag);
   2662 
   2663   // Determine target opcode
   2664   // If we have an i1, use an 8-bit store. The lowering code in
   2665   // NVPTXISelLowering will have already emitted an upcast.
   2666   unsigned Opcode = 0;
   2667   switch (N->getOpcode()) {
   2668   default:
   2669     switch (NumElts) {
   2670     default:
   2671       return nullptr;
   2672     case 1:
   2673       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2674       default:
   2675         return nullptr;
   2676       case MVT::i1:
   2677         Opcode = NVPTX::StoreParamI8;
   2678         break;
   2679       case MVT::i8:
   2680         Opcode = NVPTX::StoreParamI8;
   2681         break;
   2682       case MVT::i16:
   2683         Opcode = NVPTX::StoreParamI16;
   2684         break;
   2685       case MVT::i32:
   2686         Opcode = NVPTX::StoreParamI32;
   2687         break;
   2688       case MVT::i64:
   2689         Opcode = NVPTX::StoreParamI64;
   2690         break;
   2691       case MVT::f32:
   2692         Opcode = NVPTX::StoreParamF32;
   2693         break;
   2694       case MVT::f64:
   2695         Opcode = NVPTX::StoreParamF64;
   2696         break;
   2697       }
   2698       break;
   2699     case 2:
   2700       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2701       default:
   2702         return nullptr;
   2703       case MVT::i1:
   2704         Opcode = NVPTX::StoreParamV2I8;
   2705         break;
   2706       case MVT::i8:
   2707         Opcode = NVPTX::StoreParamV2I8;
   2708         break;
   2709       case MVT::i16:
   2710         Opcode = NVPTX::StoreParamV2I16;
   2711         break;
   2712       case MVT::i32:
   2713         Opcode = NVPTX::StoreParamV2I32;
   2714         break;
   2715       case MVT::i64:
   2716         Opcode = NVPTX::StoreParamV2I64;
   2717         break;
   2718       case MVT::f32:
   2719         Opcode = NVPTX::StoreParamV2F32;
   2720         break;
   2721       case MVT::f64:
   2722         Opcode = NVPTX::StoreParamV2F64;
   2723         break;
   2724       }
   2725       break;
   2726     case 4:
   2727       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2728       default:
   2729         return nullptr;
   2730       case MVT::i1:
   2731         Opcode = NVPTX::StoreParamV4I8;
   2732         break;
   2733       case MVT::i8:
   2734         Opcode = NVPTX::StoreParamV4I8;
   2735         break;
   2736       case MVT::i16:
   2737         Opcode = NVPTX::StoreParamV4I16;
   2738         break;
   2739       case MVT::i32:
   2740         Opcode = NVPTX::StoreParamV4I32;
   2741         break;
   2742       case MVT::f32:
   2743         Opcode = NVPTX::StoreParamV4F32;
   2744         break;
   2745       }
   2746       break;
   2747     }
   2748     break;
   2749   // Special case: if we have a sign-extend/zero-extend node, insert the
   2750   // conversion instruction first, and use that as the value operand to
   2751   // the selected StoreParam node.
   2752   case NVPTXISD::StoreParamU32: {
   2753     Opcode = NVPTX::StoreParamI32;
   2754     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
   2755                                                 MVT::i32);
   2756     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
   2757                                          MVT::i32, Ops[0], CvtNone);
   2758     Ops[0] = SDValue(Cvt, 0);
   2759     break;
   2760   }
   2761   case NVPTXISD::StoreParamS32: {
   2762     Opcode = NVPTX::StoreParamI32;
   2763     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
   2764                                                 MVT::i32);
   2765     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
   2766                                          MVT::i32, Ops[0], CvtNone);
   2767     Ops[0] = SDValue(Cvt, 0);
   2768     break;
   2769   }
   2770   }
   2771 
   2772   SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
   2773   SDNode *Ret =
   2774       CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
   2775   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2776   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2777   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2778 
   2779   return Ret;
   2780 }
   2781 
   2782 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
   2783   SDValue Chain = N->getOperand(0);
   2784   SDValue TexRef = N->getOperand(1);
   2785   SDValue SampRef = N->getOperand(2);
   2786   SDNode *Ret = nullptr;
   2787   unsigned Opc = 0;
   2788   SmallVector<SDValue, 8> Ops;
   2789 
   2790   switch (N->getOpcode()) {
   2791   default: return nullptr;
   2792   case NVPTXISD::Tex1DFloatI32:
   2793     Opc = NVPTX::TEX_1D_F32_I32;
   2794     break;
   2795   case NVPTXISD::Tex1DFloatFloat:
   2796     Opc = NVPTX::TEX_1D_F32_F32;
   2797     break;
   2798   case NVPTXISD::Tex1DFloatFloatLevel:
   2799     Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
   2800     break;
   2801   case NVPTXISD::Tex1DFloatFloatGrad:
   2802     Opc = NVPTX::TEX_1D_F32_F32_GRAD;
   2803     break;
   2804   case NVPTXISD::Tex1DI32I32:
   2805     Opc = NVPTX::TEX_1D_I32_I32;
   2806     break;
   2807   case NVPTXISD::Tex1DI32Float:
   2808     Opc = NVPTX::TEX_1D_I32_F32;
   2809     break;
   2810   case NVPTXISD::Tex1DI32FloatLevel:
   2811     Opc = NVPTX::TEX_1D_I32_F32_LEVEL;
   2812     break;
   2813   case NVPTXISD::Tex1DI32FloatGrad:
   2814     Opc = NVPTX::TEX_1D_I32_F32_GRAD;
   2815     break;
   2816   case NVPTXISD::Tex1DArrayFloatI32:
   2817     Opc = NVPTX::TEX_1D_ARRAY_F32_I32;
   2818     break;
   2819   case NVPTXISD::Tex1DArrayFloatFloat:
   2820     Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
   2821     break;
   2822   case NVPTXISD::Tex1DArrayFloatFloatLevel:
   2823     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
   2824     break;
   2825   case NVPTXISD::Tex1DArrayFloatFloatGrad:
   2826     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
   2827     break;
   2828   case NVPTXISD::Tex1DArrayI32I32:
   2829     Opc = NVPTX::TEX_1D_ARRAY_I32_I32;
   2830     break;
   2831   case NVPTXISD::Tex1DArrayI32Float:
   2832     Opc = NVPTX::TEX_1D_ARRAY_I32_F32;
   2833     break;
   2834   case NVPTXISD::Tex1DArrayI32FloatLevel:
   2835     Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL;
   2836     break;
   2837   case NVPTXISD::Tex1DArrayI32FloatGrad:
   2838     Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD;
   2839     break;
   2840   case NVPTXISD::Tex2DFloatI32:
   2841     Opc = NVPTX::TEX_2D_F32_I32;
   2842     break;
   2843   case NVPTXISD::Tex2DFloatFloat:
   2844     Opc = NVPTX::TEX_2D_F32_F32;
   2845     break;
   2846   case NVPTXISD::Tex2DFloatFloatLevel:
   2847     Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
   2848     break;
   2849   case NVPTXISD::Tex2DFloatFloatGrad:
   2850     Opc = NVPTX::TEX_2D_F32_F32_GRAD;
   2851     break;
   2852   case NVPTXISD::Tex2DI32I32:
   2853     Opc = NVPTX::TEX_2D_I32_I32;
   2854     break;
   2855   case NVPTXISD::Tex2DI32Float:
   2856     Opc = NVPTX::TEX_2D_I32_F32;
   2857     break;
   2858   case NVPTXISD::Tex2DI32FloatLevel:
   2859     Opc = NVPTX::TEX_2D_I32_F32_LEVEL;
   2860     break;
   2861   case NVPTXISD::Tex2DI32FloatGrad:
   2862     Opc = NVPTX::TEX_2D_I32_F32_GRAD;
   2863     break;
   2864   case NVPTXISD::Tex2DArrayFloatI32:
   2865     Opc = NVPTX::TEX_2D_ARRAY_F32_I32;
   2866     break;
   2867   case NVPTXISD::Tex2DArrayFloatFloat:
   2868     Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
   2869     break;
   2870   case NVPTXISD::Tex2DArrayFloatFloatLevel:
   2871     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
   2872     break;
   2873   case NVPTXISD::Tex2DArrayFloatFloatGrad:
   2874     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
   2875     break;
   2876   case NVPTXISD::Tex2DArrayI32I32:
   2877     Opc = NVPTX::TEX_2D_ARRAY_I32_I32;
   2878     break;
   2879   case NVPTXISD::Tex2DArrayI32Float:
   2880     Opc = NVPTX::TEX_2D_ARRAY_I32_F32;
   2881     break;
   2882   case NVPTXISD::Tex2DArrayI32FloatLevel:
   2883     Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL;
   2884     break;
   2885   case NVPTXISD::Tex2DArrayI32FloatGrad:
   2886     Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD;
   2887     break;
   2888   case NVPTXISD::Tex3DFloatI32:
   2889     Opc = NVPTX::TEX_3D_F32_I32;
   2890     break;
   2891   case NVPTXISD::Tex3DFloatFloat:
   2892     Opc = NVPTX::TEX_3D_F32_F32;
   2893     break;
   2894   case NVPTXISD::Tex3DFloatFloatLevel:
   2895     Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
   2896     break;
   2897   case NVPTXISD::Tex3DFloatFloatGrad:
   2898     Opc = NVPTX::TEX_3D_F32_F32_GRAD;
   2899     break;
   2900   case NVPTXISD::Tex3DI32I32:
   2901     Opc = NVPTX::TEX_3D_I32_I32;
   2902     break;
   2903   case NVPTXISD::Tex3DI32Float:
   2904     Opc = NVPTX::TEX_3D_I32_F32;
   2905     break;
   2906   case NVPTXISD::Tex3DI32FloatLevel:
   2907     Opc = NVPTX::TEX_3D_I32_F32_LEVEL;
   2908     break;
   2909   case NVPTXISD::Tex3DI32FloatGrad:
   2910     Opc = NVPTX::TEX_3D_I32_F32_GRAD;
   2911     break;
   2912   }
   2913 
   2914   Ops.push_back(TexRef);
   2915   Ops.push_back(SampRef);
   2916 
   2917   // Copy over indices
   2918   for (unsigned i = 3; i < N->getNumOperands(); ++i) {
   2919     Ops.push_back(N->getOperand(i));
   2920   }
   2921 
   2922   Ops.push_back(Chain);
   2923   Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
   2924   return Ret;
   2925 }
   2926 
   2927 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
   2928   SDValue Chain = N->getOperand(0);
   2929   SDValue TexHandle = N->getOperand(1);
   2930   SDNode *Ret = nullptr;
   2931   unsigned Opc = 0;
   2932   SmallVector<SDValue, 8> Ops;
   2933   switch (N->getOpcode()) {
   2934   default: return nullptr;
   2935   case NVPTXISD::Suld1DI8Trap:
   2936     Opc = NVPTX::SULD_1D_I8_TRAP;
   2937     Ops.push_back(TexHandle);
   2938     Ops.push_back(N->getOperand(2));
   2939     Ops.push_back(Chain);
   2940     break;
   2941   case NVPTXISD::Suld1DI16Trap:
   2942     Opc = NVPTX::SULD_1D_I16_TRAP;
   2943     Ops.push_back(TexHandle);
   2944     Ops.push_back(N->getOperand(2));
   2945     Ops.push_back(Chain);
   2946     break;
   2947   case NVPTXISD::Suld1DI32Trap:
   2948     Opc = NVPTX::SULD_1D_I32_TRAP;
   2949     Ops.push_back(TexHandle);
   2950     Ops.push_back(N->getOperand(2));
   2951     Ops.push_back(Chain);
   2952     break;
   2953   case NVPTXISD::Suld1DV2I8Trap:
   2954     Opc = NVPTX::SULD_1D_V2I8_TRAP;
   2955     Ops.push_back(TexHandle);
   2956     Ops.push_back(N->getOperand(2));
   2957     Ops.push_back(Chain);
   2958     break;
   2959   case NVPTXISD::Suld1DV2I16Trap:
   2960     Opc = NVPTX::SULD_1D_V2I16_TRAP;
   2961     Ops.push_back(TexHandle);
   2962     Ops.push_back(N->getOperand(2));
   2963     Ops.push_back(Chain);
   2964     break;
   2965   case NVPTXISD::Suld1DV2I32Trap:
   2966     Opc = NVPTX::SULD_1D_V2I32_TRAP;
   2967     Ops.push_back(TexHandle);
   2968     Ops.push_back(N->getOperand(2));
   2969     Ops.push_back(Chain);
   2970     break;
   2971   case NVPTXISD::Suld1DV4I8Trap:
   2972     Opc = NVPTX::SULD_1D_V4I8_TRAP;
   2973     Ops.push_back(TexHandle);
   2974     Ops.push_back(N->getOperand(2));
   2975     Ops.push_back(Chain);
   2976     break;
   2977   case NVPTXISD::Suld1DV4I16Trap:
   2978     Opc = NVPTX::SULD_1D_V4I16_TRAP;
   2979     Ops.push_back(TexHandle);
   2980     Ops.push_back(N->getOperand(2));
   2981     Ops.push_back(Chain);
   2982     break;
   2983   case NVPTXISD::Suld1DV4I32Trap:
   2984     Opc = NVPTX::SULD_1D_V4I32_TRAP;
   2985     Ops.push_back(TexHandle);
   2986     Ops.push_back(N->getOperand(2));
   2987     Ops.push_back(Chain);
   2988     break;
   2989   case NVPTXISD::Suld1DArrayI8Trap:
   2990     Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
   2991     Ops.push_back(TexHandle);
   2992     Ops.push_back(N->getOperand(2));
   2993     Ops.push_back(N->getOperand(3));
   2994     Ops.push_back(Chain);
   2995     break;
   2996   case NVPTXISD::Suld1DArrayI16Trap:
   2997     Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
   2998     Ops.push_back(TexHandle);
   2999     Ops.push_back(N->getOperand(2));
   3000     Ops.push_back(N->getOperand(3));
   3001     Ops.push_back(Chain);
   3002     break;
   3003   case NVPTXISD::Suld1DArrayI32Trap:
   3004     Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
   3005     Ops.push_back(TexHandle);
   3006     Ops.push_back(N->getOperand(2));
   3007     Ops.push_back(N->getOperand(3));
   3008     Ops.push_back(Chain);
   3009     break;
   3010   case NVPTXISD::Suld1DArrayV2I8Trap:
   3011     Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
   3012     Ops.push_back(TexHandle);
   3013     Ops.push_back(N->getOperand(2));
   3014     Ops.push_back(N->getOperand(3));
   3015     Ops.push_back(Chain);
   3016     break;
   3017   case NVPTXISD::Suld1DArrayV2I16Trap:
   3018     Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
   3019     Ops.push_back(TexHandle);
   3020     Ops.push_back(N->getOperand(2));
   3021     Ops.push_back(N->getOperand(3));
   3022     Ops.push_back(Chain);
   3023     break;
   3024   case NVPTXISD::Suld1DArrayV2I32Trap:
   3025     Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
   3026     Ops.push_back(TexHandle);
   3027     Ops.push_back(N->getOperand(2));
   3028     Ops.push_back(N->getOperand(3));
   3029     Ops.push_back(Chain);
   3030     break;
   3031   case NVPTXISD::Suld1DArrayV4I8Trap:
   3032     Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
   3033     Ops.push_back(TexHandle);
   3034     Ops.push_back(N->getOperand(2));
   3035     Ops.push_back(N->getOperand(3));
   3036     Ops.push_back(Chain);
   3037     break;
   3038   case NVPTXISD::Suld1DArrayV4I16Trap:
   3039     Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
   3040     Ops.push_back(TexHandle);
   3041     Ops.push_back(N->getOperand(2));
   3042     Ops.push_back(N->getOperand(3));
   3043     Ops.push_back(Chain);
   3044     break;
   3045   case NVPTXISD::Suld1DArrayV4I32Trap:
   3046     Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
   3047     Ops.push_back(TexHandle);
   3048     Ops.push_back(N->getOperand(2));
   3049     Ops.push_back(N->getOperand(3));
   3050     Ops.push_back(Chain);
   3051     break;
   3052   case NVPTXISD::Suld2DI8Trap:
   3053     Opc = NVPTX::SULD_2D_I8_TRAP;
   3054     Ops.push_back(TexHandle);
   3055     Ops.push_back(N->getOperand(2));
   3056     Ops.push_back(N->getOperand(3));
   3057     Ops.push_back(Chain);
   3058     break;
   3059   case NVPTXISD::Suld2DI16Trap:
   3060     Opc = NVPTX::SULD_2D_I16_TRAP;
   3061     Ops.push_back(TexHandle);
   3062     Ops.push_back(N->getOperand(2));
   3063     Ops.push_back(N->getOperand(3));
   3064     Ops.push_back(Chain);
   3065     break;
   3066   case NVPTXISD::Suld2DI32Trap:
   3067     Opc = NVPTX::SULD_2D_I32_TRAP;
   3068     Ops.push_back(TexHandle);
   3069     Ops.push_back(N->getOperand(2));
   3070     Ops.push_back(N->getOperand(3));
   3071     Ops.push_back(Chain);
   3072     break;
   3073   case NVPTXISD::Suld2DV2I8Trap:
   3074     Opc = NVPTX::SULD_2D_V2I8_TRAP;
   3075     Ops.push_back(TexHandle);
   3076     Ops.push_back(N->getOperand(2));
   3077     Ops.push_back(N->getOperand(3));
   3078     Ops.push_back(Chain);
   3079     break;
   3080   case NVPTXISD::Suld2DV2I16Trap:
   3081     Opc = NVPTX::SULD_2D_V2I16_TRAP;
   3082     Ops.push_back(TexHandle);
   3083     Ops.push_back(N->getOperand(2));
   3084     Ops.push_back(N->getOperand(3));
   3085     Ops.push_back(Chain);
   3086     break;
   3087   case NVPTXISD::Suld2DV2I32Trap:
   3088     Opc = NVPTX::SULD_2D_V2I32_TRAP;
   3089     Ops.push_back(TexHandle);
   3090     Ops.push_back(N->getOperand(2));
   3091     Ops.push_back(N->getOperand(3));
   3092     Ops.push_back(Chain);
   3093     break;
   3094   case NVPTXISD::Suld2DV4I8Trap:
   3095     Opc = NVPTX::SULD_2D_V4I8_TRAP;
   3096     Ops.push_back(TexHandle);
   3097     Ops.push_back(N->getOperand(2));
   3098     Ops.push_back(N->getOperand(3));
   3099     Ops.push_back(Chain);
   3100     break;
   3101   case NVPTXISD::Suld2DV4I16Trap:
   3102     Opc = NVPTX::SULD_2D_V4I16_TRAP;
   3103     Ops.push_back(TexHandle);
   3104     Ops.push_back(N->getOperand(2));
   3105     Ops.push_back(N->getOperand(3));
   3106     Ops.push_back(Chain);
   3107     break;
   3108   case NVPTXISD::Suld2DV4I32Trap:
   3109     Opc = NVPTX::SULD_2D_V4I32_TRAP;
   3110     Ops.push_back(TexHandle);
   3111     Ops.push_back(N->getOperand(2));
   3112     Ops.push_back(N->getOperand(3));
   3113     Ops.push_back(Chain);
   3114     break;
   3115   case NVPTXISD::Suld2DArrayI8Trap:
   3116     Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
   3117     Ops.push_back(TexHandle);
   3118     Ops.push_back(N->getOperand(2));
   3119     Ops.push_back(N->getOperand(3));
   3120     Ops.push_back(N->getOperand(4));
   3121     Ops.push_back(Chain);
   3122     break;
   3123   case NVPTXISD::Suld2DArrayI16Trap:
   3124     Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
   3125     Ops.push_back(TexHandle);
   3126     Ops.push_back(N->getOperand(2));
   3127     Ops.push_back(N->getOperand(3));
   3128     Ops.push_back(N->getOperand(4));
   3129     Ops.push_back(Chain);
   3130     break;
   3131   case NVPTXISD::Suld2DArrayI32Trap:
   3132     Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
   3133     Ops.push_back(TexHandle);
   3134     Ops.push_back(N->getOperand(2));
   3135     Ops.push_back(N->getOperand(3));
   3136     Ops.push_back(N->getOperand(4));
   3137     Ops.push_back(Chain);
   3138     break;
   3139   case NVPTXISD::Suld2DArrayV2I8Trap:
   3140     Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
   3141     Ops.push_back(TexHandle);
   3142     Ops.push_back(N->getOperand(2));
   3143     Ops.push_back(N->getOperand(3));
   3144     Ops.push_back(N->getOperand(4));
   3145     Ops.push_back(Chain);
   3146     break;
   3147   case NVPTXISD::Suld2DArrayV2I16Trap:
   3148     Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
   3149     Ops.push_back(TexHandle);
   3150     Ops.push_back(N->getOperand(2));
   3151     Ops.push_back(N->getOperand(3));
   3152     Ops.push_back(N->getOperand(4));
   3153     Ops.push_back(Chain);
   3154     break;
   3155   case NVPTXISD::Suld2DArrayV2I32Trap:
   3156     Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
   3157     Ops.push_back(TexHandle);
   3158     Ops.push_back(N->getOperand(2));
   3159     Ops.push_back(N->getOperand(3));
   3160     Ops.push_back(N->getOperand(4));
   3161     Ops.push_back(Chain);
   3162     break;
   3163   case NVPTXISD::Suld2DArrayV4I8Trap:
   3164     Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
   3165     Ops.push_back(TexHandle);
   3166     Ops.push_back(N->getOperand(2));
   3167     Ops.push_back(N->getOperand(3));
   3168     Ops.push_back(N->getOperand(4));
   3169     Ops.push_back(Chain);
   3170     break;
   3171   case NVPTXISD::Suld2DArrayV4I16Trap:
   3172     Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
   3173     Ops.push_back(TexHandle);
   3174     Ops.push_back(N->getOperand(2));
   3175     Ops.push_back(N->getOperand(3));
   3176     Ops.push_back(N->getOperand(4));
   3177     Ops.push_back(Chain);
   3178     break;
   3179   case NVPTXISD::Suld2DArrayV4I32Trap:
   3180     Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
   3181     Ops.push_back(TexHandle);
   3182     Ops.push_back(N->getOperand(2));
   3183     Ops.push_back(N->getOperand(3));
   3184     Ops.push_back(N->getOperand(4));
   3185     Ops.push_back(Chain);
   3186     break;
   3187   case NVPTXISD::Suld3DI8Trap:
   3188     Opc = NVPTX::SULD_3D_I8_TRAP;
   3189     Ops.push_back(TexHandle);
   3190     Ops.push_back(N->getOperand(2));
   3191     Ops.push_back(N->getOperand(3));
   3192     Ops.push_back(N->getOperand(4));
   3193     Ops.push_back(Chain);
   3194     break;
   3195   case NVPTXISD::Suld3DI16Trap:
   3196     Opc = NVPTX::SULD_3D_I16_TRAP;
   3197     Ops.push_back(TexHandle);
   3198     Ops.push_back(N->getOperand(2));
   3199     Ops.push_back(N->getOperand(3));
   3200     Ops.push_back(N->getOperand(4));
   3201     Ops.push_back(Chain);
   3202     break;
   3203   case NVPTXISD::Suld3DI32Trap:
   3204     Opc = NVPTX::SULD_3D_I32_TRAP;
   3205     Ops.push_back(TexHandle);
   3206     Ops.push_back(N->getOperand(2));
   3207     Ops.push_back(N->getOperand(3));
   3208     Ops.push_back(N->getOperand(4));
   3209     Ops.push_back(Chain);
   3210     break;
   3211   case NVPTXISD::Suld3DV2I8Trap:
   3212     Opc = NVPTX::SULD_3D_V2I8_TRAP;
   3213     Ops.push_back(TexHandle);
   3214     Ops.push_back(N->getOperand(2));
   3215     Ops.push_back(N->getOperand(3));
   3216     Ops.push_back(N->getOperand(4));
   3217     Ops.push_back(Chain);
   3218     break;
   3219   case NVPTXISD::Suld3DV2I16Trap:
   3220     Opc = NVPTX::SULD_3D_V2I16_TRAP;
   3221     Ops.push_back(TexHandle);
   3222     Ops.push_back(N->getOperand(2));
   3223     Ops.push_back(N->getOperand(3));
   3224     Ops.push_back(N->getOperand(4));
   3225     Ops.push_back(Chain);
   3226     break;
   3227   case NVPTXISD::Suld3DV2I32Trap:
   3228     Opc = NVPTX::SULD_3D_V2I32_TRAP;
   3229     Ops.push_back(TexHandle);
   3230     Ops.push_back(N->getOperand(2));
   3231     Ops.push_back(N->getOperand(3));
   3232     Ops.push_back(N->getOperand(4));
   3233     Ops.push_back(Chain);
   3234     break;
   3235   case NVPTXISD::Suld3DV4I8Trap:
   3236     Opc = NVPTX::SULD_3D_V4I8_TRAP;
   3237     Ops.push_back(TexHandle);
   3238     Ops.push_back(N->getOperand(2));
   3239     Ops.push_back(N->getOperand(3));
   3240     Ops.push_back(N->getOperand(4));
   3241     Ops.push_back(Chain);
   3242     break;
   3243   case NVPTXISD::Suld3DV4I16Trap:
   3244     Opc = NVPTX::SULD_3D_V4I16_TRAP;
   3245     Ops.push_back(TexHandle);
   3246     Ops.push_back(N->getOperand(2));
   3247     Ops.push_back(N->getOperand(3));
   3248     Ops.push_back(N->getOperand(4));
   3249     Ops.push_back(Chain);
   3250     break;
   3251   case NVPTXISD::Suld3DV4I32Trap:
   3252     Opc = NVPTX::SULD_3D_V4I32_TRAP;
   3253     Ops.push_back(TexHandle);
   3254     Ops.push_back(N->getOperand(2));
   3255     Ops.push_back(N->getOperand(3));
   3256     Ops.push_back(N->getOperand(4));
   3257     Ops.push_back(Chain);
   3258     break;
   3259   }
   3260   Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
   3261   return Ret;
   3262 }
   3263 
   3264 /// SelectBFE - Look for instruction sequences that can be made more efficient
   3265 /// by using the 'bfe' (bit-field extract) PTX instruction
   3266 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
   3267   SDValue LHS = N->getOperand(0);
   3268   SDValue RHS = N->getOperand(1);
   3269   SDValue Len;
   3270   SDValue Start;
   3271   SDValue Val;
   3272   bool IsSigned = false;
   3273 
   3274   if (N->getOpcode() == ISD::AND) {
   3275     // Canonicalize the operands
   3276     // We want 'and %val, %mask'
   3277     if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
   3278       std::swap(LHS, RHS);
   3279     }
   3280 
   3281     ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
   3282     if (!Mask) {
   3283       // We need a constant mask on the RHS of the AND
   3284       return NULL;
   3285     }
   3286 
   3287     // Extract the mask bits
   3288     uint64_t MaskVal = Mask->getZExtValue();
   3289     if (!isMask_64(MaskVal)) {
   3290       // We *could* handle shifted masks here, but doing so would require an
   3291       // 'and' operation to fix up the low-order bits so we would trade
   3292       // shr+and for bfe+and, which has the same throughput
   3293       return NULL;
   3294     }
   3295 
   3296     // How many bits are in our mask?
   3297     uint64_t NumBits = CountTrailingOnes_64(MaskVal);
   3298     Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
   3299 
   3300     if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
   3301       // We have a 'srl/and' pair, extract the effective start bit and length
   3302       Val = LHS.getNode()->getOperand(0);
   3303       Start = LHS.getNode()->getOperand(1);
   3304       ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
   3305       if (StartConst) {
   3306         uint64_t StartVal = StartConst->getZExtValue();
   3307         // How many "good" bits do we have left?  "good" is defined here as bits
   3308         // that exist in the original value, not shifted in.
   3309         uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
   3310         if (NumBits > GoodBits) {
   3311           // Do not handle the case where bits have been shifted in. In theory
   3312           // we could handle this, but the cost is likely higher than just
   3313           // emitting the srl/and pair.
   3314           return NULL;
   3315         }
   3316         Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
   3317       } else {
   3318         // Do not handle the case where the shift amount (can be zero if no srl
   3319         // was found) is not constant. We could handle this case, but it would
   3320         // require run-time logic that would be more expensive than just
   3321         // emitting the srl/and pair.
   3322         return NULL;
   3323       }
   3324     } else {
   3325       // Do not handle the case where the LHS of the and is not a shift. While
   3326       // it would be trivial to handle this case, it would just transform
   3327       // 'and' -> 'bfe', but 'and' has higher-throughput.
   3328       return NULL;
   3329     }
   3330   } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
   3331     if (LHS->getOpcode() == ISD::AND) {
   3332       ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
   3333       if (!ShiftCnst) {
   3334         // Shift amount must be constant
   3335         return NULL;
   3336       }
   3337 
   3338       uint64_t ShiftAmt = ShiftCnst->getZExtValue();
   3339 
   3340       SDValue AndLHS = LHS->getOperand(0);
   3341       SDValue AndRHS = LHS->getOperand(1);
   3342 
   3343       // Canonicalize the AND to have the mask on the RHS
   3344       if (isa<ConstantSDNode>(AndLHS)) {
   3345         std::swap(AndLHS, AndRHS);
   3346       }
   3347 
   3348       ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
   3349       if (!MaskCnst) {
   3350         // Mask must be constant
   3351         return NULL;
   3352       }
   3353 
   3354       uint64_t MaskVal = MaskCnst->getZExtValue();
   3355       uint64_t NumZeros;
   3356       uint64_t NumBits;
   3357       if (isMask_64(MaskVal)) {
   3358         NumZeros = 0;
   3359         // The number of bits in the result bitfield will be the number of
   3360         // trailing ones (the AND) minus the number of bits we shift off
   3361         NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
   3362       } else if (isShiftedMask_64(MaskVal)) {
   3363         NumZeros = countTrailingZeros(MaskVal);
   3364         unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
   3365         // The number of bits in the result bitfield will be the number of
   3366         // trailing zeros plus the number of set bits in the mask minus the
   3367         // number of bits we shift off
   3368         NumBits = NumZeros + NumOnes - ShiftAmt;
   3369       } else {
   3370         // This is not a mask we can handle
   3371         return NULL;
   3372       }
   3373 
   3374       if (ShiftAmt < NumZeros) {
   3375         // Handling this case would require extra logic that would make this
   3376         // transformation non-profitable
   3377         return NULL;
   3378       }
   3379 
   3380       Val = AndLHS;
   3381       Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
   3382       Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
   3383     } else if (LHS->getOpcode() == ISD::SHL) {
   3384       // Here, we have a pattern like:
   3385       //
   3386       // (sra (shl val, NN), MM)
   3387       // or
   3388       // (srl (shl val, NN), MM)
   3389       //
   3390       // If MM >= NN, we can efficiently optimize this with bfe
   3391       Val = LHS->getOperand(0);
   3392 
   3393       SDValue ShlRHS = LHS->getOperand(1);
   3394       ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
   3395       if (!ShlCnst) {
   3396         // Shift amount must be constant
   3397         return NULL;
   3398       }
   3399       uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
   3400 
   3401       SDValue ShrRHS = RHS;
   3402       ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
   3403       if (!ShrCnst) {
   3404         // Shift amount must be constant
   3405         return NULL;
   3406       }
   3407       uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
   3408 
   3409       // To avoid extra codegen and be profitable, we need Outer >= Inner
   3410       if (OuterShiftAmt < InnerShiftAmt) {
   3411         return NULL;
   3412       }
   3413 
   3414       // If the outer shift is more than the type size, we have no bitfield to
   3415       // extract (since we also check that the inner shift is <= the outer shift
   3416       // then this also implies that the inner shift is < the type size)
   3417       if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
   3418         return NULL;
   3419       }
   3420 
   3421       Start =
   3422         CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
   3423       Len =
   3424         CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
   3425                                   OuterShiftAmt, MVT::i32);
   3426 
   3427       if (N->getOpcode() == ISD::SRA) {
   3428         // If we have a arithmetic right shift, we need to use the signed bfe
   3429         // variant
   3430         IsSigned = true;
   3431       }
   3432     } else {
   3433       // No can do...
   3434       return NULL;
   3435     }
   3436   } else {
   3437     // No can do...
   3438     return NULL;
   3439   }
   3440 
   3441 
   3442   unsigned Opc;
   3443   // For the BFE operations we form here from "and" and "srl", always use the
   3444   // unsigned variants.
   3445   if (Val.getValueType() == MVT::i32) {
   3446     if (IsSigned) {
   3447       Opc = NVPTX::BFE_S32rii;
   3448     } else {
   3449       Opc = NVPTX::BFE_U32rii;
   3450     }
   3451   } else if (Val.getValueType() == MVT::i64) {
   3452     if (IsSigned) {
   3453       Opc = NVPTX::BFE_S64rii;
   3454     } else {
   3455       Opc = NVPTX::BFE_U64rii;
   3456     }
   3457   } else {
   3458     // We cannot handle this type
   3459     return NULL;
   3460   }
   3461 
   3462   SDValue Ops[] = {
   3463     Val, Start, Len
   3464   };
   3465 
   3466   SDNode *Ret =
   3467     CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
   3468 
   3469   return Ret;
   3470 }
   3471 
   3472 // SelectDirectAddr - Match a direct address for DAG.
   3473 // A direct address could be a globaladdress or externalsymbol.
   3474 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
   3475   // Return true if TGA or ES.
   3476   if (N.getOpcode() == ISD::TargetGlobalAddress ||
   3477       N.getOpcode() == ISD::TargetExternalSymbol) {
   3478     Address = N;
   3479     return true;
   3480   }
   3481   if (N.getOpcode() == NVPTXISD::Wrapper) {
   3482     Address = N.getOperand(0);
   3483     return true;
   3484   }
   3485   if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
   3486     unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
   3487     if (IID == Intrinsic::nvvm_ptr_gen_to_param)
   3488       if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
   3489         return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
   3490   }
   3491   return false;
   3492 }
   3493 
   3494 // symbol+offset
   3495 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
   3496     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   3497   if (Addr.getOpcode() == ISD::ADD) {
   3498     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
   3499       SDValue base = Addr.getOperand(0);
   3500       if (SelectDirectAddr(base, Base)) {
   3501         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
   3502         return true;
   3503       }
   3504     }
   3505   }
   3506   return false;
   3507 }
   3508 
   3509 // symbol+offset
   3510 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
   3511                                      SDValue &Base, SDValue &Offset) {
   3512   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
   3513 }
   3514 
   3515 // symbol+offset
   3516 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
   3517                                        SDValue &Base, SDValue &Offset) {
   3518   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
   3519 }
   3520 
   3521 // register+offset
   3522 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
   3523     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   3524   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
   3525     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
   3526     Offset = CurDAG->getTargetConstant(0, mvt);
   3527     return true;
   3528   }
   3529   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
   3530       Addr.getOpcode() == ISD::TargetGlobalAddress)
   3531     return false; // direct calls.
   3532 
   3533   if (Addr.getOpcode() == ISD::ADD) {
   3534     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
   3535       return false;
   3536     }
   3537     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
   3538       if (FrameIndexSDNode *FIN =
   3539               dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
   3540         // Constant offset from frame ref.
   3541         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
   3542       else
   3543         Base = Addr.getOperand(0);
   3544       Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
   3545       return true;
   3546     }
   3547   }
   3548   return false;
   3549 }
   3550 
   3551 // register+offset
   3552 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
   3553                                      SDValue &Base, SDValue &Offset) {
   3554   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
   3555 }
   3556 
   3557 // register+offset
   3558 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
   3559                                        SDValue &Base, SDValue &Offset) {
   3560   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
   3561 }
   3562 
   3563 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
   3564                                                  unsigned int spN) const {
   3565   const Value *Src = nullptr;
   3566   // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
   3567   // the classof() for MemSDNode does not include MemIntrinsicSDNode
   3568   // (See SelectionDAGNodes.h). So we need to check for both.
   3569   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
   3570     if (spN == 0 && mN->getMemOperand()->getPseudoValue())
   3571       return true;
   3572     Src = mN->getMemOperand()->getValue();
   3573   } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
   3574     if (spN == 0 && mN->getMemOperand()->getPseudoValue())
   3575       return true;
   3576     Src = mN->getMemOperand()->getValue();
   3577   }
   3578   if (!Src)
   3579     return false;
   3580   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
   3581     return (PT->getAddressSpace() == spN);
   3582   return false;
   3583 }
   3584 
   3585 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   3586 /// inline asm expressions.
   3587 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
   3588     const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
   3589   SDValue Op0, Op1;
   3590   switch (ConstraintCode) {
   3591   default:
   3592     return true;
   3593   case 'm': // memory
   3594     if (SelectDirectAddr(Op, Op0)) {
   3595       OutOps.push_back(Op0);
   3596       OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
   3597       return false;
   3598     }
   3599     if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
   3600       OutOps.push_back(Op0);
   3601       OutOps.push_back(Op1);
   3602       return false;
   3603     }
   3604     break;
   3605   }
   3606   return true;
   3607 }
   3608