Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines an instruction selector for the NVPTX target.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "NVPTXISelDAGToDAG.h"
     15 #include "llvm/IR/GlobalValue.h"
     16 #include "llvm/IR/Instructions.h"
     17 #include "llvm/Support/CommandLine.h"
     18 #include "llvm/Support/Debug.h"
     19 #include "llvm/Support/ErrorHandling.h"
     20 #include "llvm/Support/raw_ostream.h"
     21 #include "llvm/Target/TargetIntrinsicInfo.h"
     22 
     23 using namespace llvm;
     24 
     25 #define DEBUG_TYPE "nvptx-isel"
     26 
     27 static cl::opt<int> UsePrecDivF32(
     28     "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
     29     cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
     30              " IEEE Compliant F32 div.rnd if available."),
     31     cl::init(2));
     32 
     33 static cl::opt<bool>
     34 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
     35           cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
     36           cl::init(true));
     37 
     38 static cl::opt<bool>
     39 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
     40            cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
     41            cl::init(false));
     42 
     43 
     44 /// createNVPTXISelDag - This pass converts a legalized DAG into a
     45 /// NVPTX-specific DAG, ready for instruction scheduling.
     46 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
     47                                        llvm::CodeGenOpt::Level OptLevel) {
     48   return new NVPTXDAGToDAGISel(TM, OptLevel);
     49 }
     50 
     51 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
     52                                      CodeGenOpt::Level OptLevel)
     53     : SelectionDAGISel(tm, OptLevel), TM(tm) {
     54   doMulWide = (OptLevel > 0);
     55 }
     56 
     57 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
     58     Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
     59     return SelectionDAGISel::runOnMachineFunction(MF);
     60 }
     61 
     62 int NVPTXDAGToDAGISel::getDivF32Level() const {
     63   if (UsePrecDivF32.getNumOccurrences() > 0) {
     64     // If nvptx-prec-div32=N is used on the command-line, always honor it
     65     return UsePrecDivF32;
     66   } else {
     67     // Otherwise, use div.approx if fast math is enabled
     68     if (TM.Options.UnsafeFPMath)
     69       return 0;
     70     else
     71       return 2;
     72   }
     73 }
     74 
     75 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
     76   if (UsePrecSqrtF32.getNumOccurrences() > 0) {
     77     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
     78     return UsePrecSqrtF32;
     79   } else {
     80     // Otherwise, use sqrt.approx if fast math is enabled
     81     return !TM.Options.UnsafeFPMath;
     82   }
     83 }
     84 
     85 bool NVPTXDAGToDAGISel::useF32FTZ() const {
     86   if (FtzEnabled.getNumOccurrences() > 0) {
     87     // If nvptx-f32ftz is used on the command-line, always honor it
     88     return FtzEnabled;
     89   } else {
     90     const Function *F = MF->getFunction();
     91     // Otherwise, check for an nvptx-f32ftz attribute on the function
     92     if (F->hasFnAttribute("nvptx-f32ftz"))
     93       return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
     94     else
     95       return false;
     96   }
     97 }
     98 
     99 bool NVPTXDAGToDAGISel::allowFMA() const {
    100   const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
    101   return TL->allowFMA(*MF, OptLevel);
    102 }
    103 
    104 /// Select - Select instructions not customized! Used for
    105 /// expanded, promoted and normal instructions.
    106 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
    107 
    108   if (N->isMachineOpcode()) {
    109     N->setNodeId(-1);
    110     return nullptr; // Already selected.
    111   }
    112 
    113   SDNode *ResNode = nullptr;
    114   switch (N->getOpcode()) {
    115   case ISD::LOAD:
    116     ResNode = SelectLoad(N);
    117     break;
    118   case ISD::STORE:
    119     ResNode = SelectStore(N);
    120     break;
    121   case NVPTXISD::LoadV2:
    122   case NVPTXISD::LoadV4:
    123     ResNode = SelectLoadVector(N);
    124     break;
    125   case NVPTXISD::LDGV2:
    126   case NVPTXISD::LDGV4:
    127   case NVPTXISD::LDUV2:
    128   case NVPTXISD::LDUV4:
    129     ResNode = SelectLDGLDU(N);
    130     break;
    131   case NVPTXISD::StoreV2:
    132   case NVPTXISD::StoreV4:
    133     ResNode = SelectStoreVector(N);
    134     break;
    135   case NVPTXISD::LoadParam:
    136   case NVPTXISD::LoadParamV2:
    137   case NVPTXISD::LoadParamV4:
    138     ResNode = SelectLoadParam(N);
    139     break;
    140   case NVPTXISD::StoreRetval:
    141   case NVPTXISD::StoreRetvalV2:
    142   case NVPTXISD::StoreRetvalV4:
    143     ResNode = SelectStoreRetval(N);
    144     break;
    145   case NVPTXISD::StoreParam:
    146   case NVPTXISD::StoreParamV2:
    147   case NVPTXISD::StoreParamV4:
    148   case NVPTXISD::StoreParamS32:
    149   case NVPTXISD::StoreParamU32:
    150     ResNode = SelectStoreParam(N);
    151     break;
    152   case ISD::INTRINSIC_WO_CHAIN:
    153     ResNode = SelectIntrinsicNoChain(N);
    154     break;
    155   case ISD::INTRINSIC_W_CHAIN:
    156     ResNode = SelectIntrinsicChain(N);
    157     break;
    158   case NVPTXISD::Tex1DFloatS32:
    159   case NVPTXISD::Tex1DFloatFloat:
    160   case NVPTXISD::Tex1DFloatFloatLevel:
    161   case NVPTXISD::Tex1DFloatFloatGrad:
    162   case NVPTXISD::Tex1DS32S32:
    163   case NVPTXISD::Tex1DS32Float:
    164   case NVPTXISD::Tex1DS32FloatLevel:
    165   case NVPTXISD::Tex1DS32FloatGrad:
    166   case NVPTXISD::Tex1DU32S32:
    167   case NVPTXISD::Tex1DU32Float:
    168   case NVPTXISD::Tex1DU32FloatLevel:
    169   case NVPTXISD::Tex1DU32FloatGrad:
    170   case NVPTXISD::Tex1DArrayFloatS32:
    171   case NVPTXISD::Tex1DArrayFloatFloat:
    172   case NVPTXISD::Tex1DArrayFloatFloatLevel:
    173   case NVPTXISD::Tex1DArrayFloatFloatGrad:
    174   case NVPTXISD::Tex1DArrayS32S32:
    175   case NVPTXISD::Tex1DArrayS32Float:
    176   case NVPTXISD::Tex1DArrayS32FloatLevel:
    177   case NVPTXISD::Tex1DArrayS32FloatGrad:
    178   case NVPTXISD::Tex1DArrayU32S32:
    179   case NVPTXISD::Tex1DArrayU32Float:
    180   case NVPTXISD::Tex1DArrayU32FloatLevel:
    181   case NVPTXISD::Tex1DArrayU32FloatGrad:
    182   case NVPTXISD::Tex2DFloatS32:
    183   case NVPTXISD::Tex2DFloatFloat:
    184   case NVPTXISD::Tex2DFloatFloatLevel:
    185   case NVPTXISD::Tex2DFloatFloatGrad:
    186   case NVPTXISD::Tex2DS32S32:
    187   case NVPTXISD::Tex2DS32Float:
    188   case NVPTXISD::Tex2DS32FloatLevel:
    189   case NVPTXISD::Tex2DS32FloatGrad:
    190   case NVPTXISD::Tex2DU32S32:
    191   case NVPTXISD::Tex2DU32Float:
    192   case NVPTXISD::Tex2DU32FloatLevel:
    193   case NVPTXISD::Tex2DU32FloatGrad:
    194   case NVPTXISD::Tex2DArrayFloatS32:
    195   case NVPTXISD::Tex2DArrayFloatFloat:
    196   case NVPTXISD::Tex2DArrayFloatFloatLevel:
    197   case NVPTXISD::Tex2DArrayFloatFloatGrad:
    198   case NVPTXISD::Tex2DArrayS32S32:
    199   case NVPTXISD::Tex2DArrayS32Float:
    200   case NVPTXISD::Tex2DArrayS32FloatLevel:
    201   case NVPTXISD::Tex2DArrayS32FloatGrad:
    202   case NVPTXISD::Tex2DArrayU32S32:
    203   case NVPTXISD::Tex2DArrayU32Float:
    204   case NVPTXISD::Tex2DArrayU32FloatLevel:
    205   case NVPTXISD::Tex2DArrayU32FloatGrad:
    206   case NVPTXISD::Tex3DFloatS32:
    207   case NVPTXISD::Tex3DFloatFloat:
    208   case NVPTXISD::Tex3DFloatFloatLevel:
    209   case NVPTXISD::Tex3DFloatFloatGrad:
    210   case NVPTXISD::Tex3DS32S32:
    211   case NVPTXISD::Tex3DS32Float:
    212   case NVPTXISD::Tex3DS32FloatLevel:
    213   case NVPTXISD::Tex3DS32FloatGrad:
    214   case NVPTXISD::Tex3DU32S32:
    215   case NVPTXISD::Tex3DU32Float:
    216   case NVPTXISD::Tex3DU32FloatLevel:
    217   case NVPTXISD::Tex3DU32FloatGrad:
    218   case NVPTXISD::TexCubeFloatFloat:
    219   case NVPTXISD::TexCubeFloatFloatLevel:
    220   case NVPTXISD::TexCubeS32Float:
    221   case NVPTXISD::TexCubeS32FloatLevel:
    222   case NVPTXISD::TexCubeU32Float:
    223   case NVPTXISD::TexCubeU32FloatLevel:
    224   case NVPTXISD::TexCubeArrayFloatFloat:
    225   case NVPTXISD::TexCubeArrayFloatFloatLevel:
    226   case NVPTXISD::TexCubeArrayS32Float:
    227   case NVPTXISD::TexCubeArrayS32FloatLevel:
    228   case NVPTXISD::TexCubeArrayU32Float:
    229   case NVPTXISD::TexCubeArrayU32FloatLevel:
    230   case NVPTXISD::Tld4R2DFloatFloat:
    231   case NVPTXISD::Tld4G2DFloatFloat:
    232   case NVPTXISD::Tld4B2DFloatFloat:
    233   case NVPTXISD::Tld4A2DFloatFloat:
    234   case NVPTXISD::Tld4R2DS64Float:
    235   case NVPTXISD::Tld4G2DS64Float:
    236   case NVPTXISD::Tld4B2DS64Float:
    237   case NVPTXISD::Tld4A2DS64Float:
    238   case NVPTXISD::Tld4R2DU64Float:
    239   case NVPTXISD::Tld4G2DU64Float:
    240   case NVPTXISD::Tld4B2DU64Float:
    241   case NVPTXISD::Tld4A2DU64Float:
    242   case NVPTXISD::TexUnified1DFloatS32:
    243   case NVPTXISD::TexUnified1DFloatFloat:
    244   case NVPTXISD::TexUnified1DFloatFloatLevel:
    245   case NVPTXISD::TexUnified1DFloatFloatGrad:
    246   case NVPTXISD::TexUnified1DS32S32:
    247   case NVPTXISD::TexUnified1DS32Float:
    248   case NVPTXISD::TexUnified1DS32FloatLevel:
    249   case NVPTXISD::TexUnified1DS32FloatGrad:
    250   case NVPTXISD::TexUnified1DU32S32:
    251   case NVPTXISD::TexUnified1DU32Float:
    252   case NVPTXISD::TexUnified1DU32FloatLevel:
    253   case NVPTXISD::TexUnified1DU32FloatGrad:
    254   case NVPTXISD::TexUnified1DArrayFloatS32:
    255   case NVPTXISD::TexUnified1DArrayFloatFloat:
    256   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
    257   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
    258   case NVPTXISD::TexUnified1DArrayS32S32:
    259   case NVPTXISD::TexUnified1DArrayS32Float:
    260   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
    261   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
    262   case NVPTXISD::TexUnified1DArrayU32S32:
    263   case NVPTXISD::TexUnified1DArrayU32Float:
    264   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
    265   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
    266   case NVPTXISD::TexUnified2DFloatS32:
    267   case NVPTXISD::TexUnified2DFloatFloat:
    268   case NVPTXISD::TexUnified2DFloatFloatLevel:
    269   case NVPTXISD::TexUnified2DFloatFloatGrad:
    270   case NVPTXISD::TexUnified2DS32S32:
    271   case NVPTXISD::TexUnified2DS32Float:
    272   case NVPTXISD::TexUnified2DS32FloatLevel:
    273   case NVPTXISD::TexUnified2DS32FloatGrad:
    274   case NVPTXISD::TexUnified2DU32S32:
    275   case NVPTXISD::TexUnified2DU32Float:
    276   case NVPTXISD::TexUnified2DU32FloatLevel:
    277   case NVPTXISD::TexUnified2DU32FloatGrad:
    278   case NVPTXISD::TexUnified2DArrayFloatS32:
    279   case NVPTXISD::TexUnified2DArrayFloatFloat:
    280   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
    281   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
    282   case NVPTXISD::TexUnified2DArrayS32S32:
    283   case NVPTXISD::TexUnified2DArrayS32Float:
    284   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
    285   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
    286   case NVPTXISD::TexUnified2DArrayU32S32:
    287   case NVPTXISD::TexUnified2DArrayU32Float:
    288   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
    289   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
    290   case NVPTXISD::TexUnified3DFloatS32:
    291   case NVPTXISD::TexUnified3DFloatFloat:
    292   case NVPTXISD::TexUnified3DFloatFloatLevel:
    293   case NVPTXISD::TexUnified3DFloatFloatGrad:
    294   case NVPTXISD::TexUnified3DS32S32:
    295   case NVPTXISD::TexUnified3DS32Float:
    296   case NVPTXISD::TexUnified3DS32FloatLevel:
    297   case NVPTXISD::TexUnified3DS32FloatGrad:
    298   case NVPTXISD::TexUnified3DU32S32:
    299   case NVPTXISD::TexUnified3DU32Float:
    300   case NVPTXISD::TexUnified3DU32FloatLevel:
    301   case NVPTXISD::TexUnified3DU32FloatGrad:
    302   case NVPTXISD::TexUnifiedCubeFloatFloat:
    303   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
    304   case NVPTXISD::TexUnifiedCubeS32Float:
    305   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
    306   case NVPTXISD::TexUnifiedCubeU32Float:
    307   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
    308   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
    309   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
    310   case NVPTXISD::TexUnifiedCubeArrayS32Float:
    311   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
    312   case NVPTXISD::TexUnifiedCubeArrayU32Float:
    313   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
    314   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
    315   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
    316   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
    317   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
    318   case NVPTXISD::Tld4UnifiedR2DS64Float:
    319   case NVPTXISD::Tld4UnifiedG2DS64Float:
    320   case NVPTXISD::Tld4UnifiedB2DS64Float:
    321   case NVPTXISD::Tld4UnifiedA2DS64Float:
    322   case NVPTXISD::Tld4UnifiedR2DU64Float:
    323   case NVPTXISD::Tld4UnifiedG2DU64Float:
    324   case NVPTXISD::Tld4UnifiedB2DU64Float:
    325   case NVPTXISD::Tld4UnifiedA2DU64Float:
    326     ResNode = SelectTextureIntrinsic(N);
    327     break;
    328   case NVPTXISD::Suld1DI8Clamp:
    329   case NVPTXISD::Suld1DI16Clamp:
    330   case NVPTXISD::Suld1DI32Clamp:
    331   case NVPTXISD::Suld1DI64Clamp:
    332   case NVPTXISD::Suld1DV2I8Clamp:
    333   case NVPTXISD::Suld1DV2I16Clamp:
    334   case NVPTXISD::Suld1DV2I32Clamp:
    335   case NVPTXISD::Suld1DV2I64Clamp:
    336   case NVPTXISD::Suld1DV4I8Clamp:
    337   case NVPTXISD::Suld1DV4I16Clamp:
    338   case NVPTXISD::Suld1DV4I32Clamp:
    339   case NVPTXISD::Suld1DArrayI8Clamp:
    340   case NVPTXISD::Suld1DArrayI16Clamp:
    341   case NVPTXISD::Suld1DArrayI32Clamp:
    342   case NVPTXISD::Suld1DArrayI64Clamp:
    343   case NVPTXISD::Suld1DArrayV2I8Clamp:
    344   case NVPTXISD::Suld1DArrayV2I16Clamp:
    345   case NVPTXISD::Suld1DArrayV2I32Clamp:
    346   case NVPTXISD::Suld1DArrayV2I64Clamp:
    347   case NVPTXISD::Suld1DArrayV4I8Clamp:
    348   case NVPTXISD::Suld1DArrayV4I16Clamp:
    349   case NVPTXISD::Suld1DArrayV4I32Clamp:
    350   case NVPTXISD::Suld2DI8Clamp:
    351   case NVPTXISD::Suld2DI16Clamp:
    352   case NVPTXISD::Suld2DI32Clamp:
    353   case NVPTXISD::Suld2DI64Clamp:
    354   case NVPTXISD::Suld2DV2I8Clamp:
    355   case NVPTXISD::Suld2DV2I16Clamp:
    356   case NVPTXISD::Suld2DV2I32Clamp:
    357   case NVPTXISD::Suld2DV2I64Clamp:
    358   case NVPTXISD::Suld2DV4I8Clamp:
    359   case NVPTXISD::Suld2DV4I16Clamp:
    360   case NVPTXISD::Suld2DV4I32Clamp:
    361   case NVPTXISD::Suld2DArrayI8Clamp:
    362   case NVPTXISD::Suld2DArrayI16Clamp:
    363   case NVPTXISD::Suld2DArrayI32Clamp:
    364   case NVPTXISD::Suld2DArrayI64Clamp:
    365   case NVPTXISD::Suld2DArrayV2I8Clamp:
    366   case NVPTXISD::Suld2DArrayV2I16Clamp:
    367   case NVPTXISD::Suld2DArrayV2I32Clamp:
    368   case NVPTXISD::Suld2DArrayV2I64Clamp:
    369   case NVPTXISD::Suld2DArrayV4I8Clamp:
    370   case NVPTXISD::Suld2DArrayV4I16Clamp:
    371   case NVPTXISD::Suld2DArrayV4I32Clamp:
    372   case NVPTXISD::Suld3DI8Clamp:
    373   case NVPTXISD::Suld3DI16Clamp:
    374   case NVPTXISD::Suld3DI32Clamp:
    375   case NVPTXISD::Suld3DI64Clamp:
    376   case NVPTXISD::Suld3DV2I8Clamp:
    377   case NVPTXISD::Suld3DV2I16Clamp:
    378   case NVPTXISD::Suld3DV2I32Clamp:
    379   case NVPTXISD::Suld3DV2I64Clamp:
    380   case NVPTXISD::Suld3DV4I8Clamp:
    381   case NVPTXISD::Suld3DV4I16Clamp:
    382   case NVPTXISD::Suld3DV4I32Clamp:
    383   case NVPTXISD::Suld1DI8Trap:
    384   case NVPTXISD::Suld1DI16Trap:
    385   case NVPTXISD::Suld1DI32Trap:
    386   case NVPTXISD::Suld1DI64Trap:
    387   case NVPTXISD::Suld1DV2I8Trap:
    388   case NVPTXISD::Suld1DV2I16Trap:
    389   case NVPTXISD::Suld1DV2I32Trap:
    390   case NVPTXISD::Suld1DV2I64Trap:
    391   case NVPTXISD::Suld1DV4I8Trap:
    392   case NVPTXISD::Suld1DV4I16Trap:
    393   case NVPTXISD::Suld1DV4I32Trap:
    394   case NVPTXISD::Suld1DArrayI8Trap:
    395   case NVPTXISD::Suld1DArrayI16Trap:
    396   case NVPTXISD::Suld1DArrayI32Trap:
    397   case NVPTXISD::Suld1DArrayI64Trap:
    398   case NVPTXISD::Suld1DArrayV2I8Trap:
    399   case NVPTXISD::Suld1DArrayV2I16Trap:
    400   case NVPTXISD::Suld1DArrayV2I32Trap:
    401   case NVPTXISD::Suld1DArrayV2I64Trap:
    402   case NVPTXISD::Suld1DArrayV4I8Trap:
    403   case NVPTXISD::Suld1DArrayV4I16Trap:
    404   case NVPTXISD::Suld1DArrayV4I32Trap:
    405   case NVPTXISD::Suld2DI8Trap:
    406   case NVPTXISD::Suld2DI16Trap:
    407   case NVPTXISD::Suld2DI32Trap:
    408   case NVPTXISD::Suld2DI64Trap:
    409   case NVPTXISD::Suld2DV2I8Trap:
    410   case NVPTXISD::Suld2DV2I16Trap:
    411   case NVPTXISD::Suld2DV2I32Trap:
    412   case NVPTXISD::Suld2DV2I64Trap:
    413   case NVPTXISD::Suld2DV4I8Trap:
    414   case NVPTXISD::Suld2DV4I16Trap:
    415   case NVPTXISD::Suld2DV4I32Trap:
    416   case NVPTXISD::Suld2DArrayI8Trap:
    417   case NVPTXISD::Suld2DArrayI16Trap:
    418   case NVPTXISD::Suld2DArrayI32Trap:
    419   case NVPTXISD::Suld2DArrayI64Trap:
    420   case NVPTXISD::Suld2DArrayV2I8Trap:
    421   case NVPTXISD::Suld2DArrayV2I16Trap:
    422   case NVPTXISD::Suld2DArrayV2I32Trap:
    423   case NVPTXISD::Suld2DArrayV2I64Trap:
    424   case NVPTXISD::Suld2DArrayV4I8Trap:
    425   case NVPTXISD::Suld2DArrayV4I16Trap:
    426   case NVPTXISD::Suld2DArrayV4I32Trap:
    427   case NVPTXISD::Suld3DI8Trap:
    428   case NVPTXISD::Suld3DI16Trap:
    429   case NVPTXISD::Suld3DI32Trap:
    430   case NVPTXISD::Suld3DI64Trap:
    431   case NVPTXISD::Suld3DV2I8Trap:
    432   case NVPTXISD::Suld3DV2I16Trap:
    433   case NVPTXISD::Suld3DV2I32Trap:
    434   case NVPTXISD::Suld3DV2I64Trap:
    435   case NVPTXISD::Suld3DV4I8Trap:
    436   case NVPTXISD::Suld3DV4I16Trap:
    437   case NVPTXISD::Suld3DV4I32Trap:
    438   case NVPTXISD::Suld1DI8Zero:
    439   case NVPTXISD::Suld1DI16Zero:
    440   case NVPTXISD::Suld1DI32Zero:
    441   case NVPTXISD::Suld1DI64Zero:
    442   case NVPTXISD::Suld1DV2I8Zero:
    443   case NVPTXISD::Suld1DV2I16Zero:
    444   case NVPTXISD::Suld1DV2I32Zero:
    445   case NVPTXISD::Suld1DV2I64Zero:
    446   case NVPTXISD::Suld1DV4I8Zero:
    447   case NVPTXISD::Suld1DV4I16Zero:
    448   case NVPTXISD::Suld1DV4I32Zero:
    449   case NVPTXISD::Suld1DArrayI8Zero:
    450   case NVPTXISD::Suld1DArrayI16Zero:
    451   case NVPTXISD::Suld1DArrayI32Zero:
    452   case NVPTXISD::Suld1DArrayI64Zero:
    453   case NVPTXISD::Suld1DArrayV2I8Zero:
    454   case NVPTXISD::Suld1DArrayV2I16Zero:
    455   case NVPTXISD::Suld1DArrayV2I32Zero:
    456   case NVPTXISD::Suld1DArrayV2I64Zero:
    457   case NVPTXISD::Suld1DArrayV4I8Zero:
    458   case NVPTXISD::Suld1DArrayV4I16Zero:
    459   case NVPTXISD::Suld1DArrayV4I32Zero:
    460   case NVPTXISD::Suld2DI8Zero:
    461   case NVPTXISD::Suld2DI16Zero:
    462   case NVPTXISD::Suld2DI32Zero:
    463   case NVPTXISD::Suld2DI64Zero:
    464   case NVPTXISD::Suld2DV2I8Zero:
    465   case NVPTXISD::Suld2DV2I16Zero:
    466   case NVPTXISD::Suld2DV2I32Zero:
    467   case NVPTXISD::Suld2DV2I64Zero:
    468   case NVPTXISD::Suld2DV4I8Zero:
    469   case NVPTXISD::Suld2DV4I16Zero:
    470   case NVPTXISD::Suld2DV4I32Zero:
    471   case NVPTXISD::Suld2DArrayI8Zero:
    472   case NVPTXISD::Suld2DArrayI16Zero:
    473   case NVPTXISD::Suld2DArrayI32Zero:
    474   case NVPTXISD::Suld2DArrayI64Zero:
    475   case NVPTXISD::Suld2DArrayV2I8Zero:
    476   case NVPTXISD::Suld2DArrayV2I16Zero:
    477   case NVPTXISD::Suld2DArrayV2I32Zero:
    478   case NVPTXISD::Suld2DArrayV2I64Zero:
    479   case NVPTXISD::Suld2DArrayV4I8Zero:
    480   case NVPTXISD::Suld2DArrayV4I16Zero:
    481   case NVPTXISD::Suld2DArrayV4I32Zero:
    482   case NVPTXISD::Suld3DI8Zero:
    483   case NVPTXISD::Suld3DI16Zero:
    484   case NVPTXISD::Suld3DI32Zero:
    485   case NVPTXISD::Suld3DI64Zero:
    486   case NVPTXISD::Suld3DV2I8Zero:
    487   case NVPTXISD::Suld3DV2I16Zero:
    488   case NVPTXISD::Suld3DV2I32Zero:
    489   case NVPTXISD::Suld3DV2I64Zero:
    490   case NVPTXISD::Suld3DV4I8Zero:
    491   case NVPTXISD::Suld3DV4I16Zero:
    492   case NVPTXISD::Suld3DV4I32Zero:
    493     ResNode = SelectSurfaceIntrinsic(N);
    494     break;
    495   case ISD::AND:
    496   case ISD::SRA:
    497   case ISD::SRL:
    498     // Try to select BFE
    499     ResNode = SelectBFE(N);
    500     break;
    501   case ISD::ADDRSPACECAST:
    502     ResNode = SelectAddrSpaceCast(N);
    503     break;
    504   default:
    505     break;
    506   }
    507   if (ResNode)
    508     return ResNode;
    509   return SelectCode(N);
    510 }
    511 
    512 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
    513   unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
    514   switch (IID) {
    515   default:
    516     return NULL;
    517   case Intrinsic::nvvm_ldg_global_f:
    518   case Intrinsic::nvvm_ldg_global_i:
    519   case Intrinsic::nvvm_ldg_global_p:
    520   case Intrinsic::nvvm_ldu_global_f:
    521   case Intrinsic::nvvm_ldu_global_i:
    522   case Intrinsic::nvvm_ldu_global_p:
    523     return SelectLDGLDU(N);
    524   }
    525 }
    526 
    527 static unsigned int getCodeAddrSpace(MemSDNode *N) {
    528   const Value *Src = N->getMemOperand()->getValue();
    529 
    530   if (!Src)
    531     return NVPTX::PTXLdStInstCode::GENERIC;
    532 
    533   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
    534     switch (PT->getAddressSpace()) {
    535     case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
    536     case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
    537     case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
    538     case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
    539     case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
    540     case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
    541     default: break;
    542     }
    543   }
    544   return NVPTX::PTXLdStInstCode::GENERIC;
    545 }
    546 
    547 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
    548   unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
    549   switch (IID) {
    550   default:
    551     return nullptr;
    552   case Intrinsic::nvvm_texsurf_handle_internal:
    553     return SelectTexSurfHandle(N);
    554   }
    555 }
    556 
    557 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
    558   // Op 0 is the intrinsic ID
    559   SDValue Wrapper = N->getOperand(1);
    560   SDValue GlobalVal = Wrapper.getOperand(0);
    561   return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
    562                                 GlobalVal);
    563 }
    564 
    565 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
    566   SDValue Src = N->getOperand(0);
    567   AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
    568   unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
    569   unsigned DstAddrSpace = CastN->getDestAddressSpace();
    570 
    571   assert(SrcAddrSpace != DstAddrSpace &&
    572          "addrspacecast must be between different address spaces");
    573 
    574   if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
    575     // Specific to generic
    576     unsigned Opc;
    577     switch (SrcAddrSpace) {
    578     default: report_fatal_error("Bad address space in addrspacecast");
    579     case ADDRESS_SPACE_GLOBAL:
    580       Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
    581       break;
    582     case ADDRESS_SPACE_SHARED:
    583       Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
    584       break;
    585     case ADDRESS_SPACE_CONST:
    586       Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
    587       break;
    588     case ADDRESS_SPACE_LOCAL:
    589       Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
    590       break;
    591     }
    592     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
    593   } else {
    594     // Generic to specific
    595     if (SrcAddrSpace != 0)
    596       report_fatal_error("Cannot cast between two non-generic address spaces");
    597     unsigned Opc;
    598     switch (DstAddrSpace) {
    599     default: report_fatal_error("Bad address space in addrspacecast");
    600     case ADDRESS_SPACE_GLOBAL:
    601       Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
    602                          : NVPTX::cvta_to_global_yes;
    603       break;
    604     case ADDRESS_SPACE_SHARED:
    605       Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
    606                          : NVPTX::cvta_to_shared_yes;
    607       break;
    608     case ADDRESS_SPACE_CONST:
    609       Opc =
    610           TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
    611       break;
    612     case ADDRESS_SPACE_LOCAL:
    613       Opc =
    614           TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
    615       break;
    616     }
    617     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
    618   }
    619 }
    620 
    621 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
    622   SDLoc dl(N);
    623   LoadSDNode *LD = cast<LoadSDNode>(N);
    624   EVT LoadedVT = LD->getMemoryVT();
    625   SDNode *NVPTXLD = nullptr;
    626 
    627   // do not support pre/post inc/dec
    628   if (LD->isIndexed())
    629     return nullptr;
    630 
    631   if (!LoadedVT.isSimple())
    632     return nullptr;
    633 
    634   // Address Space Setting
    635   unsigned int codeAddrSpace = getCodeAddrSpace(LD);
    636 
    637   // Volatile Setting
    638   // - .volatile is only availalble for .global and .shared
    639   bool isVolatile = LD->isVolatile();
    640   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    641       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    642       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    643     isVolatile = false;
    644 
    645   // Vector Setting
    646   MVT SimpleVT = LoadedVT.getSimpleVT();
    647   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
    648   if (SimpleVT.isVector()) {
    649     unsigned num = SimpleVT.getVectorNumElements();
    650     if (num == 2)
    651       vecType = NVPTX::PTXLdStInstCode::V2;
    652     else if (num == 4)
    653       vecType = NVPTX::PTXLdStInstCode::V4;
    654     else
    655       return nullptr;
    656   }
    657 
    658   // Type Setting: fromType + fromTypeWidth
    659   //
    660   // Sign   : ISD::SEXTLOAD
    661   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
    662   //          type is integer
    663   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
    664   MVT ScalarVT = SimpleVT.getScalarType();
    665   // Read at least 8 bits (predicates are stored as 8-bit values)
    666   unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
    667   unsigned int fromType;
    668   if ((LD->getExtensionType() == ISD::SEXTLOAD))
    669     fromType = NVPTX::PTXLdStInstCode::Signed;
    670   else if (ScalarVT.isFloatingPoint())
    671     fromType = NVPTX::PTXLdStInstCode::Float;
    672   else
    673     fromType = NVPTX::PTXLdStInstCode::Unsigned;
    674 
    675   // Create the machine instruction DAG
    676   SDValue Chain = N->getOperand(0);
    677   SDValue N1 = N->getOperand(1);
    678   SDValue Addr;
    679   SDValue Offset, Base;
    680   unsigned Opcode;
    681   MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
    682 
    683   if (SelectDirectAddr(N1, Addr)) {
    684     switch (TargetVT) {
    685     case MVT::i8:
    686       Opcode = NVPTX::LD_i8_avar;
    687       break;
    688     case MVT::i16:
    689       Opcode = NVPTX::LD_i16_avar;
    690       break;
    691     case MVT::i32:
    692       Opcode = NVPTX::LD_i32_avar;
    693       break;
    694     case MVT::i64:
    695       Opcode = NVPTX::LD_i64_avar;
    696       break;
    697     case MVT::f32:
    698       Opcode = NVPTX::LD_f32_avar;
    699       break;
    700     case MVT::f64:
    701       Opcode = NVPTX::LD_f64_avar;
    702       break;
    703     default:
    704       return nullptr;
    705     }
    706     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
    707                       getI32Imm(vecType), getI32Imm(fromType),
    708                       getI32Imm(fromTypeWidth), Addr, Chain };
    709     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    710   } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
    711                           : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
    712     switch (TargetVT) {
    713     case MVT::i8:
    714       Opcode = NVPTX::LD_i8_asi;
    715       break;
    716     case MVT::i16:
    717       Opcode = NVPTX::LD_i16_asi;
    718       break;
    719     case MVT::i32:
    720       Opcode = NVPTX::LD_i32_asi;
    721       break;
    722     case MVT::i64:
    723       Opcode = NVPTX::LD_i64_asi;
    724       break;
    725     case MVT::f32:
    726       Opcode = NVPTX::LD_f32_asi;
    727       break;
    728     case MVT::f64:
    729       Opcode = NVPTX::LD_f64_asi;
    730       break;
    731     default:
    732       return nullptr;
    733     }
    734     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
    735                       getI32Imm(vecType), getI32Imm(fromType),
    736                       getI32Imm(fromTypeWidth), Base, Offset, Chain };
    737     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    738   } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
    739                           : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
    740     if (TM.is64Bit()) {
    741       switch (TargetVT) {
    742       case MVT::i8:
    743         Opcode = NVPTX::LD_i8_ari_64;
    744         break;
    745       case MVT::i16:
    746         Opcode = NVPTX::LD_i16_ari_64;
    747         break;
    748       case MVT::i32:
    749         Opcode = NVPTX::LD_i32_ari_64;
    750         break;
    751       case MVT::i64:
    752         Opcode = NVPTX::LD_i64_ari_64;
    753         break;
    754       case MVT::f32:
    755         Opcode = NVPTX::LD_f32_ari_64;
    756         break;
    757       case MVT::f64:
    758         Opcode = NVPTX::LD_f64_ari_64;
    759         break;
    760       default:
    761         return nullptr;
    762       }
    763     } else {
    764       switch (TargetVT) {
    765       case MVT::i8:
    766         Opcode = NVPTX::LD_i8_ari;
    767         break;
    768       case MVT::i16:
    769         Opcode = NVPTX::LD_i16_ari;
    770         break;
    771       case MVT::i32:
    772         Opcode = NVPTX::LD_i32_ari;
    773         break;
    774       case MVT::i64:
    775         Opcode = NVPTX::LD_i64_ari;
    776         break;
    777       case MVT::f32:
    778         Opcode = NVPTX::LD_f32_ari;
    779         break;
    780       case MVT::f64:
    781         Opcode = NVPTX::LD_f64_ari;
    782         break;
    783       default:
    784         return nullptr;
    785       }
    786     }
    787     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
    788                       getI32Imm(vecType), getI32Imm(fromType),
    789                       getI32Imm(fromTypeWidth), Base, Offset, Chain };
    790     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    791   } else {
    792     if (TM.is64Bit()) {
    793       switch (TargetVT) {
    794       case MVT::i8:
    795         Opcode = NVPTX::LD_i8_areg_64;
    796         break;
    797       case MVT::i16:
    798         Opcode = NVPTX::LD_i16_areg_64;
    799         break;
    800       case MVT::i32:
    801         Opcode = NVPTX::LD_i32_areg_64;
    802         break;
    803       case MVT::i64:
    804         Opcode = NVPTX::LD_i64_areg_64;
    805         break;
    806       case MVT::f32:
    807         Opcode = NVPTX::LD_f32_areg_64;
    808         break;
    809       case MVT::f64:
    810         Opcode = NVPTX::LD_f64_areg_64;
    811         break;
    812       default:
    813         return nullptr;
    814       }
    815     } else {
    816       switch (TargetVT) {
    817       case MVT::i8:
    818         Opcode = NVPTX::LD_i8_areg;
    819         break;
    820       case MVT::i16:
    821         Opcode = NVPTX::LD_i16_areg;
    822         break;
    823       case MVT::i32:
    824         Opcode = NVPTX::LD_i32_areg;
    825         break;
    826       case MVT::i64:
    827         Opcode = NVPTX::LD_i64_areg;
    828         break;
    829       case MVT::f32:
    830         Opcode = NVPTX::LD_f32_areg;
    831         break;
    832       case MVT::f64:
    833         Opcode = NVPTX::LD_f64_areg;
    834         break;
    835       default:
    836         return nullptr;
    837       }
    838     }
    839     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
    840                       getI32Imm(vecType), getI32Imm(fromType),
    841                       getI32Imm(fromTypeWidth), N1, Chain };
    842     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    843   }
    844 
    845   if (NVPTXLD) {
    846     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
    847     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
    848     cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
    849   }
    850 
    851   return NVPTXLD;
    852 }
    853 
    854 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
    855 
    856   SDValue Chain = N->getOperand(0);
    857   SDValue Op1 = N->getOperand(1);
    858   SDValue Addr, Offset, Base;
    859   unsigned Opcode;
    860   SDLoc DL(N);
    861   SDNode *LD;
    862   MemSDNode *MemSD = cast<MemSDNode>(N);
    863   EVT LoadedVT = MemSD->getMemoryVT();
    864 
    865   if (!LoadedVT.isSimple())
    866     return nullptr;
    867 
    868   // Address Space Setting
    869   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
    870 
    871   // Volatile Setting
    872   // - .volatile is only availalble for .global and .shared
    873   bool IsVolatile = MemSD->isVolatile();
    874   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    875       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    876       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    877     IsVolatile = false;
    878 
    879   // Vector Setting
    880   MVT SimpleVT = LoadedVT.getSimpleVT();
    881 
    882   // Type Setting: fromType + fromTypeWidth
    883   //
    884   // Sign   : ISD::SEXTLOAD
    885   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
    886   //          type is integer
    887   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
    888   MVT ScalarVT = SimpleVT.getScalarType();
    889   // Read at least 8 bits (predicates are stored as 8-bit values)
    890   unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
    891   unsigned int FromType;
    892   // The last operand holds the original LoadSDNode::getExtensionType() value
    893   unsigned ExtensionType = cast<ConstantSDNode>(
    894       N->getOperand(N->getNumOperands() - 1))->getZExtValue();
    895   if (ExtensionType == ISD::SEXTLOAD)
    896     FromType = NVPTX::PTXLdStInstCode::Signed;
    897   else if (ScalarVT.isFloatingPoint())
    898     FromType = NVPTX::PTXLdStInstCode::Float;
    899   else
    900     FromType = NVPTX::PTXLdStInstCode::Unsigned;
    901 
    902   unsigned VecType;
    903 
    904   switch (N->getOpcode()) {
    905   case NVPTXISD::LoadV2:
    906     VecType = NVPTX::PTXLdStInstCode::V2;
    907     break;
    908   case NVPTXISD::LoadV4:
    909     VecType = NVPTX::PTXLdStInstCode::V4;
    910     break;
    911   default:
    912     return nullptr;
    913   }
    914 
    915   EVT EltVT = N->getValueType(0);
    916 
    917   if (SelectDirectAddr(Op1, Addr)) {
    918     switch (N->getOpcode()) {
    919     default:
    920       return nullptr;
    921     case NVPTXISD::LoadV2:
    922       switch (EltVT.getSimpleVT().SimpleTy) {
    923       default:
    924         return nullptr;
    925       case MVT::i8:
    926         Opcode = NVPTX::LDV_i8_v2_avar;
    927         break;
    928       case MVT::i16:
    929         Opcode = NVPTX::LDV_i16_v2_avar;
    930         break;
    931       case MVT::i32:
    932         Opcode = NVPTX::LDV_i32_v2_avar;
    933         break;
    934       case MVT::i64:
    935         Opcode = NVPTX::LDV_i64_v2_avar;
    936         break;
    937       case MVT::f32:
    938         Opcode = NVPTX::LDV_f32_v2_avar;
    939         break;
    940       case MVT::f64:
    941         Opcode = NVPTX::LDV_f64_v2_avar;
    942         break;
    943       }
    944       break;
    945     case NVPTXISD::LoadV4:
    946       switch (EltVT.getSimpleVT().SimpleTy) {
    947       default:
    948         return nullptr;
    949       case MVT::i8:
    950         Opcode = NVPTX::LDV_i8_v4_avar;
    951         break;
    952       case MVT::i16:
    953         Opcode = NVPTX::LDV_i16_v4_avar;
    954         break;
    955       case MVT::i32:
    956         Opcode = NVPTX::LDV_i32_v4_avar;
    957         break;
    958       case MVT::f32:
    959         Opcode = NVPTX::LDV_f32_v4_avar;
    960         break;
    961       }
    962       break;
    963     }
    964 
    965     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
    966                       getI32Imm(VecType), getI32Imm(FromType),
    967                       getI32Imm(FromTypeWidth), Addr, Chain };
    968     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
    969   } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
    970                           : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
    971     switch (N->getOpcode()) {
    972     default:
    973       return nullptr;
    974     case NVPTXISD::LoadV2:
    975       switch (EltVT.getSimpleVT().SimpleTy) {
    976       default:
    977         return nullptr;
    978       case MVT::i8:
    979         Opcode = NVPTX::LDV_i8_v2_asi;
    980         break;
    981       case MVT::i16:
    982         Opcode = NVPTX::LDV_i16_v2_asi;
    983         break;
    984       case MVT::i32:
    985         Opcode = NVPTX::LDV_i32_v2_asi;
    986         break;
    987       case MVT::i64:
    988         Opcode = NVPTX::LDV_i64_v2_asi;
    989         break;
    990       case MVT::f32:
    991         Opcode = NVPTX::LDV_f32_v2_asi;
    992         break;
    993       case MVT::f64:
    994         Opcode = NVPTX::LDV_f64_v2_asi;
    995         break;
    996       }
    997       break;
    998     case NVPTXISD::LoadV4:
    999       switch (EltVT.getSimpleVT().SimpleTy) {
   1000       default:
   1001         return nullptr;
   1002       case MVT::i8:
   1003         Opcode = NVPTX::LDV_i8_v4_asi;
   1004         break;
   1005       case MVT::i16:
   1006         Opcode = NVPTX::LDV_i16_v4_asi;
   1007         break;
   1008       case MVT::i32:
   1009         Opcode = NVPTX::LDV_i32_v4_asi;
   1010         break;
   1011       case MVT::f32:
   1012         Opcode = NVPTX::LDV_f32_v4_asi;
   1013         break;
   1014       }
   1015       break;
   1016     }
   1017 
   1018     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
   1019                       getI32Imm(VecType), getI32Imm(FromType),
   1020                       getI32Imm(FromTypeWidth), Base, Offset, Chain };
   1021     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1022   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
   1023                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
   1024     if (TM.is64Bit()) {
   1025       switch (N->getOpcode()) {
   1026       default:
   1027         return nullptr;
   1028       case NVPTXISD::LoadV2:
   1029         switch (EltVT.getSimpleVT().SimpleTy) {
   1030         default:
   1031           return nullptr;
   1032         case MVT::i8:
   1033           Opcode = NVPTX::LDV_i8_v2_ari_64;
   1034           break;
   1035         case MVT::i16:
   1036           Opcode = NVPTX::LDV_i16_v2_ari_64;
   1037           break;
   1038         case MVT::i32:
   1039           Opcode = NVPTX::LDV_i32_v2_ari_64;
   1040           break;
   1041         case MVT::i64:
   1042           Opcode = NVPTX::LDV_i64_v2_ari_64;
   1043           break;
   1044         case MVT::f32:
   1045           Opcode = NVPTX::LDV_f32_v2_ari_64;
   1046           break;
   1047         case MVT::f64:
   1048           Opcode = NVPTX::LDV_f64_v2_ari_64;
   1049           break;
   1050         }
   1051         break;
   1052       case NVPTXISD::LoadV4:
   1053         switch (EltVT.getSimpleVT().SimpleTy) {
   1054         default:
   1055           return nullptr;
   1056         case MVT::i8:
   1057           Opcode = NVPTX::LDV_i8_v4_ari_64;
   1058           break;
   1059         case MVT::i16:
   1060           Opcode = NVPTX::LDV_i16_v4_ari_64;
   1061           break;
   1062         case MVT::i32:
   1063           Opcode = NVPTX::LDV_i32_v4_ari_64;
   1064           break;
   1065         case MVT::f32:
   1066           Opcode = NVPTX::LDV_f32_v4_ari_64;
   1067           break;
   1068         }
   1069         break;
   1070       }
   1071     } else {
   1072       switch (N->getOpcode()) {
   1073       default:
   1074         return nullptr;
   1075       case NVPTXISD::LoadV2:
   1076         switch (EltVT.getSimpleVT().SimpleTy) {
   1077         default:
   1078           return nullptr;
   1079         case MVT::i8:
   1080           Opcode = NVPTX::LDV_i8_v2_ari;
   1081           break;
   1082         case MVT::i16:
   1083           Opcode = NVPTX::LDV_i16_v2_ari;
   1084           break;
   1085         case MVT::i32:
   1086           Opcode = NVPTX::LDV_i32_v2_ari;
   1087           break;
   1088         case MVT::i64:
   1089           Opcode = NVPTX::LDV_i64_v2_ari;
   1090           break;
   1091         case MVT::f32:
   1092           Opcode = NVPTX::LDV_f32_v2_ari;
   1093           break;
   1094         case MVT::f64:
   1095           Opcode = NVPTX::LDV_f64_v2_ari;
   1096           break;
   1097         }
   1098         break;
   1099       case NVPTXISD::LoadV4:
   1100         switch (EltVT.getSimpleVT().SimpleTy) {
   1101         default:
   1102           return nullptr;
   1103         case MVT::i8:
   1104           Opcode = NVPTX::LDV_i8_v4_ari;
   1105           break;
   1106         case MVT::i16:
   1107           Opcode = NVPTX::LDV_i16_v4_ari;
   1108           break;
   1109         case MVT::i32:
   1110           Opcode = NVPTX::LDV_i32_v4_ari;
   1111           break;
   1112         case MVT::f32:
   1113           Opcode = NVPTX::LDV_f32_v4_ari;
   1114           break;
   1115         }
   1116         break;
   1117       }
   1118     }
   1119 
   1120     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
   1121                       getI32Imm(VecType), getI32Imm(FromType),
   1122                       getI32Imm(FromTypeWidth), Base, Offset, Chain };
   1123 
   1124     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1125   } else {
   1126     if (TM.is64Bit()) {
   1127       switch (N->getOpcode()) {
   1128       default:
   1129         return nullptr;
   1130       case NVPTXISD::LoadV2:
   1131         switch (EltVT.getSimpleVT().SimpleTy) {
   1132         default:
   1133           return nullptr;
   1134         case MVT::i8:
   1135           Opcode = NVPTX::LDV_i8_v2_areg_64;
   1136           break;
   1137         case MVT::i16:
   1138           Opcode = NVPTX::LDV_i16_v2_areg_64;
   1139           break;
   1140         case MVT::i32:
   1141           Opcode = NVPTX::LDV_i32_v2_areg_64;
   1142           break;
   1143         case MVT::i64:
   1144           Opcode = NVPTX::LDV_i64_v2_areg_64;
   1145           break;
   1146         case MVT::f32:
   1147           Opcode = NVPTX::LDV_f32_v2_areg_64;
   1148           break;
   1149         case MVT::f64:
   1150           Opcode = NVPTX::LDV_f64_v2_areg_64;
   1151           break;
   1152         }
   1153         break;
   1154       case NVPTXISD::LoadV4:
   1155         switch (EltVT.getSimpleVT().SimpleTy) {
   1156         default:
   1157           return nullptr;
   1158         case MVT::i8:
   1159           Opcode = NVPTX::LDV_i8_v4_areg_64;
   1160           break;
   1161         case MVT::i16:
   1162           Opcode = NVPTX::LDV_i16_v4_areg_64;
   1163           break;
   1164         case MVT::i32:
   1165           Opcode = NVPTX::LDV_i32_v4_areg_64;
   1166           break;
   1167         case MVT::f32:
   1168           Opcode = NVPTX::LDV_f32_v4_areg_64;
   1169           break;
   1170         }
   1171         break;
   1172       }
   1173     } else {
   1174       switch (N->getOpcode()) {
   1175       default:
   1176         return nullptr;
   1177       case NVPTXISD::LoadV2:
   1178         switch (EltVT.getSimpleVT().SimpleTy) {
   1179         default:
   1180           return nullptr;
   1181         case MVT::i8:
   1182           Opcode = NVPTX::LDV_i8_v2_areg;
   1183           break;
   1184         case MVT::i16:
   1185           Opcode = NVPTX::LDV_i16_v2_areg;
   1186           break;
   1187         case MVT::i32:
   1188           Opcode = NVPTX::LDV_i32_v2_areg;
   1189           break;
   1190         case MVT::i64:
   1191           Opcode = NVPTX::LDV_i64_v2_areg;
   1192           break;
   1193         case MVT::f32:
   1194           Opcode = NVPTX::LDV_f32_v2_areg;
   1195           break;
   1196         case MVT::f64:
   1197           Opcode = NVPTX::LDV_f64_v2_areg;
   1198           break;
   1199         }
   1200         break;
   1201       case NVPTXISD::LoadV4:
   1202         switch (EltVT.getSimpleVT().SimpleTy) {
   1203         default:
   1204           return nullptr;
   1205         case MVT::i8:
   1206           Opcode = NVPTX::LDV_i8_v4_areg;
   1207           break;
   1208         case MVT::i16:
   1209           Opcode = NVPTX::LDV_i16_v4_areg;
   1210           break;
   1211         case MVT::i32:
   1212           Opcode = NVPTX::LDV_i32_v4_areg;
   1213           break;
   1214         case MVT::f32:
   1215           Opcode = NVPTX::LDV_f32_v4_areg;
   1216           break;
   1217         }
   1218         break;
   1219       }
   1220     }
   1221 
   1222     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
   1223                       getI32Imm(VecType), getI32Imm(FromType),
   1224                       getI32Imm(FromTypeWidth), Op1, Chain };
   1225     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1226   }
   1227 
   1228   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   1229   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   1230   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
   1231 
   1232   return LD;
   1233 }
   1234 
   1235 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
   1236 
   1237   SDValue Chain = N->getOperand(0);
   1238   SDValue Op1;
   1239   MemSDNode *Mem;
   1240   bool IsLDG = true;
   1241 
   1242   // If this is an LDG intrinsic, the address is the third operand. Its its an
   1243   // LDG/LDU SD node (from custom vector handling), then its the second operand
   1244   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
   1245     Op1 = N->getOperand(2);
   1246     Mem = cast<MemIntrinsicSDNode>(N);
   1247     unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
   1248     switch (IID) {
   1249     default:
   1250       return NULL;
   1251     case Intrinsic::nvvm_ldg_global_f:
   1252     case Intrinsic::nvvm_ldg_global_i:
   1253     case Intrinsic::nvvm_ldg_global_p:
   1254       IsLDG = true;
   1255       break;
   1256     case Intrinsic::nvvm_ldu_global_f:
   1257     case Intrinsic::nvvm_ldu_global_i:
   1258     case Intrinsic::nvvm_ldu_global_p:
   1259       IsLDG = false;
   1260       break;
   1261     }
   1262   } else {
   1263     Op1 = N->getOperand(1);
   1264     Mem = cast<MemSDNode>(N);
   1265   }
   1266 
   1267   unsigned Opcode;
   1268   SDLoc DL(N);
   1269   SDNode *LD;
   1270   SDValue Base, Offset, Addr;
   1271 
   1272   EVT EltVT = Mem->getMemoryVT();
   1273   if (EltVT.isVector()) {
   1274     EltVT = EltVT.getVectorElementType();
   1275   }
   1276 
   1277   if (SelectDirectAddr(Op1, Addr)) {
   1278     switch (N->getOpcode()) {
   1279     default:
   1280       return nullptr;
   1281     case ISD::INTRINSIC_W_CHAIN:
   1282       if (IsLDG) {
   1283         switch (EltVT.getSimpleVT().SimpleTy) {
   1284         default:
   1285           return nullptr;
   1286         case MVT::i8:
   1287           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
   1288           break;
   1289         case MVT::i16:
   1290           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
   1291           break;
   1292         case MVT::i32:
   1293           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
   1294           break;
   1295         case MVT::i64:
   1296           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
   1297           break;
   1298         case MVT::f32:
   1299           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
   1300           break;
   1301         case MVT::f64:
   1302           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
   1303           break;
   1304         }
   1305       } else {
   1306         switch (EltVT.getSimpleVT().SimpleTy) {
   1307         default:
   1308           return nullptr;
   1309         case MVT::i8:
   1310           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
   1311           break;
   1312         case MVT::i16:
   1313           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
   1314           break;
   1315         case MVT::i32:
   1316           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
   1317           break;
   1318         case MVT::i64:
   1319           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
   1320           break;
   1321         case MVT::f32:
   1322           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
   1323           break;
   1324         case MVT::f64:
   1325           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
   1326           break;
   1327         }
   1328       }
   1329       break;
   1330     case NVPTXISD::LDGV2:
   1331       switch (EltVT.getSimpleVT().SimpleTy) {
   1332       default:
   1333         return nullptr;
   1334       case MVT::i8:
   1335         Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
   1336         break;
   1337       case MVT::i16:
   1338         Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
   1339         break;
   1340       case MVT::i32:
   1341         Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
   1342         break;
   1343       case MVT::i64:
   1344         Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
   1345         break;
   1346       case MVT::f32:
   1347         Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
   1348         break;
   1349       case MVT::f64:
   1350         Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
   1351         break;
   1352       }
   1353       break;
   1354     case NVPTXISD::LDUV2:
   1355       switch (EltVT.getSimpleVT().SimpleTy) {
   1356       default:
   1357         return nullptr;
   1358       case MVT::i8:
   1359         Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
   1360         break;
   1361       case MVT::i16:
   1362         Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
   1363         break;
   1364       case MVT::i32:
   1365         Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
   1366         break;
   1367       case MVT::i64:
   1368         Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
   1369         break;
   1370       case MVT::f32:
   1371         Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
   1372         break;
   1373       case MVT::f64:
   1374         Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
   1375         break;
   1376       }
   1377       break;
   1378     case NVPTXISD::LDGV4:
   1379       switch (EltVT.getSimpleVT().SimpleTy) {
   1380       default:
   1381         return nullptr;
   1382       case MVT::i8:
   1383         Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
   1384         break;
   1385       case MVT::i16:
   1386         Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
   1387         break;
   1388       case MVT::i32:
   1389         Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
   1390         break;
   1391       case MVT::f32:
   1392         Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
   1393         break;
   1394       }
   1395       break;
   1396     case NVPTXISD::LDUV4:
   1397       switch (EltVT.getSimpleVT().SimpleTy) {
   1398       default:
   1399         return nullptr;
   1400       case MVT::i8:
   1401         Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
   1402         break;
   1403       case MVT::i16:
   1404         Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
   1405         break;
   1406       case MVT::i32:
   1407         Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
   1408         break;
   1409       case MVT::f32:
   1410         Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
   1411         break;
   1412       }
   1413       break;
   1414     }
   1415 
   1416     SDValue Ops[] = { Addr, Chain };
   1417     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1418   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
   1419                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
   1420     if (TM.is64Bit()) {
   1421       switch (N->getOpcode()) {
   1422       default:
   1423         return nullptr;
   1424       case ISD::INTRINSIC_W_CHAIN:
   1425         if (IsLDG) {
   1426           switch (EltVT.getSimpleVT().SimpleTy) {
   1427           default:
   1428             return nullptr;
   1429           case MVT::i8:
   1430             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
   1431             break;
   1432           case MVT::i16:
   1433             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
   1434             break;
   1435           case MVT::i32:
   1436             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
   1437             break;
   1438           case MVT::i64:
   1439             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
   1440             break;
   1441           case MVT::f32:
   1442             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
   1443             break;
   1444           case MVT::f64:
   1445             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
   1446             break;
   1447           }
   1448         } else {
   1449           switch (EltVT.getSimpleVT().SimpleTy) {
   1450           default:
   1451             return nullptr;
   1452           case MVT::i8:
   1453             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
   1454             break;
   1455           case MVT::i16:
   1456             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
   1457             break;
   1458           case MVT::i32:
   1459             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
   1460             break;
   1461           case MVT::i64:
   1462             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
   1463             break;
   1464           case MVT::f32:
   1465             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
   1466             break;
   1467           case MVT::f64:
   1468             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
   1469             break;
   1470           }
   1471         }
   1472         break;
   1473       case NVPTXISD::LDGV2:
   1474         switch (EltVT.getSimpleVT().SimpleTy) {
   1475         default:
   1476           return nullptr;
   1477         case MVT::i8:
   1478           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
   1479           break;
   1480         case MVT::i16:
   1481           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
   1482           break;
   1483         case MVT::i32:
   1484           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
   1485           break;
   1486         case MVT::i64:
   1487           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
   1488           break;
   1489         case MVT::f32:
   1490           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
   1491           break;
   1492         case MVT::f64:
   1493           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
   1494           break;
   1495         }
   1496         break;
   1497       case NVPTXISD::LDUV2:
   1498         switch (EltVT.getSimpleVT().SimpleTy) {
   1499         default:
   1500           return nullptr;
   1501         case MVT::i8:
   1502           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
   1503           break;
   1504         case MVT::i16:
   1505           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
   1506           break;
   1507         case MVT::i32:
   1508           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
   1509           break;
   1510         case MVT::i64:
   1511           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
   1512           break;
   1513         case MVT::f32:
   1514           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
   1515           break;
   1516         case MVT::f64:
   1517           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
   1518           break;
   1519         }
   1520         break;
   1521       case NVPTXISD::LDGV4:
   1522         switch (EltVT.getSimpleVT().SimpleTy) {
   1523         default:
   1524           return nullptr;
   1525         case MVT::i8:
   1526           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
   1527           break;
   1528         case MVT::i16:
   1529           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
   1530           break;
   1531         case MVT::i32:
   1532           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
   1533           break;
   1534         case MVT::f32:
   1535           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
   1536           break;
   1537         }
   1538         break;
   1539       case NVPTXISD::LDUV4:
   1540         switch (EltVT.getSimpleVT().SimpleTy) {
   1541         default:
   1542           return nullptr;
   1543         case MVT::i8:
   1544           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
   1545           break;
   1546         case MVT::i16:
   1547           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
   1548           break;
   1549         case MVT::i32:
   1550           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
   1551           break;
   1552         case MVT::f32:
   1553           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
   1554           break;
   1555         }
   1556         break;
   1557       }
   1558     } else {
   1559       switch (N->getOpcode()) {
   1560       default:
   1561         return nullptr;
   1562       case ISD::INTRINSIC_W_CHAIN:
   1563         if (IsLDG) {
   1564           switch (EltVT.getSimpleVT().SimpleTy) {
   1565           default:
   1566             return nullptr;
   1567           case MVT::i8:
   1568             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
   1569             break;
   1570           case MVT::i16:
   1571             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
   1572             break;
   1573           case MVT::i32:
   1574             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
   1575             break;
   1576           case MVT::i64:
   1577             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
   1578             break;
   1579           case MVT::f32:
   1580             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
   1581             break;
   1582           case MVT::f64:
   1583             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
   1584             break;
   1585           }
   1586         } else {
   1587           switch (EltVT.getSimpleVT().SimpleTy) {
   1588           default:
   1589             return nullptr;
   1590           case MVT::i8:
   1591             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
   1592             break;
   1593           case MVT::i16:
   1594             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
   1595             break;
   1596           case MVT::i32:
   1597             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
   1598             break;
   1599           case MVT::i64:
   1600             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
   1601             break;
   1602           case MVT::f32:
   1603             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
   1604             break;
   1605           case MVT::f64:
   1606             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
   1607             break;
   1608           }
   1609         }
   1610         break;
   1611       case NVPTXISD::LDGV2:
   1612         switch (EltVT.getSimpleVT().SimpleTy) {
   1613         default:
   1614           return nullptr;
   1615         case MVT::i8:
   1616           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
   1617           break;
   1618         case MVT::i16:
   1619           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
   1620           break;
   1621         case MVT::i32:
   1622           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
   1623           break;
   1624         case MVT::i64:
   1625           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
   1626           break;
   1627         case MVT::f32:
   1628           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
   1629           break;
   1630         case MVT::f64:
   1631           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
   1632           break;
   1633         }
   1634         break;
   1635       case NVPTXISD::LDUV2:
   1636         switch (EltVT.getSimpleVT().SimpleTy) {
   1637         default:
   1638           return nullptr;
   1639         case MVT::i8:
   1640           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
   1641           break;
   1642         case MVT::i16:
   1643           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
   1644           break;
   1645         case MVT::i32:
   1646           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
   1647           break;
   1648         case MVT::i64:
   1649           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
   1650           break;
   1651         case MVT::f32:
   1652           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
   1653           break;
   1654         case MVT::f64:
   1655           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
   1656           break;
   1657         }
   1658         break;
   1659       case NVPTXISD::LDGV4:
   1660         switch (EltVT.getSimpleVT().SimpleTy) {
   1661         default:
   1662           return nullptr;
   1663         case MVT::i8:
   1664           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
   1665           break;
   1666         case MVT::i16:
   1667           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
   1668           break;
   1669         case MVT::i32:
   1670           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
   1671           break;
   1672         case MVT::f32:
   1673           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
   1674           break;
   1675         }
   1676         break;
   1677       case NVPTXISD::LDUV4:
   1678         switch (EltVT.getSimpleVT().SimpleTy) {
   1679         default:
   1680           return nullptr;
   1681         case MVT::i8:
   1682           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
   1683           break;
   1684         case MVT::i16:
   1685           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
   1686           break;
   1687         case MVT::i32:
   1688           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
   1689           break;
   1690         case MVT::f32:
   1691           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
   1692           break;
   1693         }
   1694         break;
   1695       }
   1696     }
   1697 
   1698     SDValue Ops[] = { Base, Offset, Chain };
   1699 
   1700     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1701   } else {
   1702     if (TM.is64Bit()) {
   1703       switch (N->getOpcode()) {
   1704       default:
   1705         return nullptr;
   1706       case ISD::INTRINSIC_W_CHAIN:
   1707         if (IsLDG) {
   1708           switch (EltVT.getSimpleVT().SimpleTy) {
   1709           default:
   1710             return nullptr;
   1711           case MVT::i8:
   1712             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
   1713             break;
   1714           case MVT::i16:
   1715             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
   1716             break;
   1717           case MVT::i32:
   1718             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
   1719             break;
   1720           case MVT::i64:
   1721             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
   1722             break;
   1723           case MVT::f32:
   1724             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
   1725             break;
   1726           case MVT::f64:
   1727             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
   1728             break;
   1729           }
   1730         } else {
   1731           switch (EltVT.getSimpleVT().SimpleTy) {
   1732           default:
   1733             return nullptr;
   1734           case MVT::i8:
   1735             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
   1736             break;
   1737           case MVT::i16:
   1738             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
   1739             break;
   1740           case MVT::i32:
   1741             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
   1742             break;
   1743           case MVT::i64:
   1744             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
   1745             break;
   1746           case MVT::f32:
   1747             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
   1748             break;
   1749           case MVT::f64:
   1750             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
   1751             break;
   1752           }
   1753         }
   1754         break;
   1755       case NVPTXISD::LDGV2:
   1756         switch (EltVT.getSimpleVT().SimpleTy) {
   1757         default:
   1758           return nullptr;
   1759         case MVT::i8:
   1760           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
   1761           break;
   1762         case MVT::i16:
   1763           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
   1764           break;
   1765         case MVT::i32:
   1766           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
   1767           break;
   1768         case MVT::i64:
   1769           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
   1770           break;
   1771         case MVT::f32:
   1772           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
   1773           break;
   1774         case MVT::f64:
   1775           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
   1776           break;
   1777         }
   1778         break;
   1779       case NVPTXISD::LDUV2:
   1780         switch (EltVT.getSimpleVT().SimpleTy) {
   1781         default:
   1782           return nullptr;
   1783         case MVT::i8:
   1784           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
   1785           break;
   1786         case MVT::i16:
   1787           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
   1788           break;
   1789         case MVT::i32:
   1790           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
   1791           break;
   1792         case MVT::i64:
   1793           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
   1794           break;
   1795         case MVT::f32:
   1796           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
   1797           break;
   1798         case MVT::f64:
   1799           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
   1800           break;
   1801         }
   1802         break;
   1803       case NVPTXISD::LDGV4:
   1804         switch (EltVT.getSimpleVT().SimpleTy) {
   1805         default:
   1806           return nullptr;
   1807         case MVT::i8:
   1808           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
   1809           break;
   1810         case MVT::i16:
   1811           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
   1812           break;
   1813         case MVT::i32:
   1814           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
   1815           break;
   1816         case MVT::f32:
   1817           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
   1818           break;
   1819         }
   1820         break;
   1821       case NVPTXISD::LDUV4:
   1822         switch (EltVT.getSimpleVT().SimpleTy) {
   1823         default:
   1824           return nullptr;
   1825         case MVT::i8:
   1826           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
   1827           break;
   1828         case MVT::i16:
   1829           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
   1830           break;
   1831         case MVT::i32:
   1832           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
   1833           break;
   1834         case MVT::f32:
   1835           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
   1836           break;
   1837         }
   1838         break;
   1839       }
   1840     } else {
   1841       switch (N->getOpcode()) {
   1842       default:
   1843         return nullptr;
   1844       case ISD::INTRINSIC_W_CHAIN:
   1845         if (IsLDG) {
   1846           switch (EltVT.getSimpleVT().SimpleTy) {
   1847           default:
   1848             return nullptr;
   1849           case MVT::i8:
   1850             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
   1851             break;
   1852           case MVT::i16:
   1853             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
   1854             break;
   1855           case MVT::i32:
   1856             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
   1857             break;
   1858           case MVT::i64:
   1859             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
   1860             break;
   1861           case MVT::f32:
   1862             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
   1863             break;
   1864           case MVT::f64:
   1865             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
   1866             break;
   1867           }
   1868         } else {
   1869           switch (EltVT.getSimpleVT().SimpleTy) {
   1870           default:
   1871             return nullptr;
   1872           case MVT::i8:
   1873             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
   1874             break;
   1875           case MVT::i16:
   1876             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
   1877             break;
   1878           case MVT::i32:
   1879             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
   1880             break;
   1881           case MVT::i64:
   1882             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
   1883             break;
   1884           case MVT::f32:
   1885             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
   1886             break;
   1887           case MVT::f64:
   1888             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
   1889             break;
   1890           }
   1891         }
   1892         break;
   1893       case NVPTXISD::LDGV2:
   1894         switch (EltVT.getSimpleVT().SimpleTy) {
   1895         default:
   1896           return nullptr;
   1897         case MVT::i8:
   1898           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
   1899           break;
   1900         case MVT::i16:
   1901           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
   1902           break;
   1903         case MVT::i32:
   1904           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
   1905           break;
   1906         case MVT::i64:
   1907           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
   1908           break;
   1909         case MVT::f32:
   1910           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
   1911           break;
   1912         case MVT::f64:
   1913           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
   1914           break;
   1915         }
   1916         break;
   1917       case NVPTXISD::LDUV2:
   1918         switch (EltVT.getSimpleVT().SimpleTy) {
   1919         default:
   1920           return nullptr;
   1921         case MVT::i8:
   1922           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
   1923           break;
   1924         case MVT::i16:
   1925           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
   1926           break;
   1927         case MVT::i32:
   1928           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
   1929           break;
   1930         case MVT::i64:
   1931           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
   1932           break;
   1933         case MVT::f32:
   1934           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
   1935           break;
   1936         case MVT::f64:
   1937           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
   1938           break;
   1939         }
   1940         break;
   1941       case NVPTXISD::LDGV4:
   1942         switch (EltVT.getSimpleVT().SimpleTy) {
   1943         default:
   1944           return nullptr;
   1945         case MVT::i8:
   1946           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
   1947           break;
   1948         case MVT::i16:
   1949           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
   1950           break;
   1951         case MVT::i32:
   1952           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
   1953           break;
   1954         case MVT::f32:
   1955           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
   1956           break;
   1957         }
   1958         break;
   1959       case NVPTXISD::LDUV4:
   1960         switch (EltVT.getSimpleVT().SimpleTy) {
   1961         default:
   1962           return nullptr;
   1963         case MVT::i8:
   1964           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
   1965           break;
   1966         case MVT::i16:
   1967           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
   1968           break;
   1969         case MVT::i32:
   1970           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
   1971           break;
   1972         case MVT::f32:
   1973           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
   1974           break;
   1975         }
   1976         break;
   1977       }
   1978     }
   1979 
   1980     SDValue Ops[] = { Op1, Chain };
   1981     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1982   }
   1983 
   1984   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   1985   MemRefs0[0] = Mem->getMemOperand();
   1986   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
   1987 
   1988   return LD;
   1989 }
   1990 
   1991 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
   1992   SDLoc dl(N);
   1993   StoreSDNode *ST = cast<StoreSDNode>(N);
   1994   EVT StoreVT = ST->getMemoryVT();
   1995   SDNode *NVPTXST = nullptr;
   1996 
   1997   // do not support pre/post inc/dec
   1998   if (ST->isIndexed())
   1999     return nullptr;
   2000 
   2001   if (!StoreVT.isSimple())
   2002     return nullptr;
   2003 
   2004   // Address Space Setting
   2005   unsigned int codeAddrSpace = getCodeAddrSpace(ST);
   2006 
   2007   // Volatile Setting
   2008   // - .volatile is only availalble for .global and .shared
   2009   bool isVolatile = ST->isVolatile();
   2010   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
   2011       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
   2012       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
   2013     isVolatile = false;
   2014 
   2015   // Vector Setting
   2016   MVT SimpleVT = StoreVT.getSimpleVT();
   2017   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
   2018   if (SimpleVT.isVector()) {
   2019     unsigned num = SimpleVT.getVectorNumElements();
   2020     if (num == 2)
   2021       vecType = NVPTX::PTXLdStInstCode::V2;
   2022     else if (num == 4)
   2023       vecType = NVPTX::PTXLdStInstCode::V4;
   2024     else
   2025       return nullptr;
   2026   }
   2027 
   2028   // Type Setting: toType + toTypeWidth
   2029   // - for integer type, always use 'u'
   2030   //
   2031   MVT ScalarVT = SimpleVT.getScalarType();
   2032   unsigned toTypeWidth = ScalarVT.getSizeInBits();
   2033   unsigned int toType;
   2034   if (ScalarVT.isFloatingPoint())
   2035     toType = NVPTX::PTXLdStInstCode::Float;
   2036   else
   2037     toType = NVPTX::PTXLdStInstCode::Unsigned;
   2038 
   2039   // Create the machine instruction DAG
   2040   SDValue Chain = N->getOperand(0);
   2041   SDValue N1 = N->getOperand(1);
   2042   SDValue N2 = N->getOperand(2);
   2043   SDValue Addr;
   2044   SDValue Offset, Base;
   2045   unsigned Opcode;
   2046   MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
   2047 
   2048   if (SelectDirectAddr(N2, Addr)) {
   2049     switch (SourceVT) {
   2050     case MVT::i8:
   2051       Opcode = NVPTX::ST_i8_avar;
   2052       break;
   2053     case MVT::i16:
   2054       Opcode = NVPTX::ST_i16_avar;
   2055       break;
   2056     case MVT::i32:
   2057       Opcode = NVPTX::ST_i32_avar;
   2058       break;
   2059     case MVT::i64:
   2060       Opcode = NVPTX::ST_i64_avar;
   2061       break;
   2062     case MVT::f32:
   2063       Opcode = NVPTX::ST_f32_avar;
   2064       break;
   2065     case MVT::f64:
   2066       Opcode = NVPTX::ST_f64_avar;
   2067       break;
   2068     default:
   2069       return nullptr;
   2070     }
   2071     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
   2072                       getI32Imm(vecType), getI32Imm(toType),
   2073                       getI32Imm(toTypeWidth), Addr, Chain };
   2074     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   2075   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
   2076                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
   2077     switch (SourceVT) {
   2078     case MVT::i8:
   2079       Opcode = NVPTX::ST_i8_asi;
   2080       break;
   2081     case MVT::i16:
   2082       Opcode = NVPTX::ST_i16_asi;
   2083       break;
   2084     case MVT::i32:
   2085       Opcode = NVPTX::ST_i32_asi;
   2086       break;
   2087     case MVT::i64:
   2088       Opcode = NVPTX::ST_i64_asi;
   2089       break;
   2090     case MVT::f32:
   2091       Opcode = NVPTX::ST_f32_asi;
   2092       break;
   2093     case MVT::f64:
   2094       Opcode = NVPTX::ST_f64_asi;
   2095       break;
   2096     default:
   2097       return nullptr;
   2098     }
   2099     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
   2100                       getI32Imm(vecType), getI32Imm(toType),
   2101                       getI32Imm(toTypeWidth), Base, Offset, Chain };
   2102     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   2103   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
   2104                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
   2105     if (TM.is64Bit()) {
   2106       switch (SourceVT) {
   2107       case MVT::i8:
   2108         Opcode = NVPTX::ST_i8_ari_64;
   2109         break;
   2110       case MVT::i16:
   2111         Opcode = NVPTX::ST_i16_ari_64;
   2112         break;
   2113       case MVT::i32:
   2114         Opcode = NVPTX::ST_i32_ari_64;
   2115         break;
   2116       case MVT::i64:
   2117         Opcode = NVPTX::ST_i64_ari_64;
   2118         break;
   2119       case MVT::f32:
   2120         Opcode = NVPTX::ST_f32_ari_64;
   2121         break;
   2122       case MVT::f64:
   2123         Opcode = NVPTX::ST_f64_ari_64;
   2124         break;
   2125       default:
   2126         return nullptr;
   2127       }
   2128     } else {
   2129       switch (SourceVT) {
   2130       case MVT::i8:
   2131         Opcode = NVPTX::ST_i8_ari;
   2132         break;
   2133       case MVT::i16:
   2134         Opcode = NVPTX::ST_i16_ari;
   2135         break;
   2136       case MVT::i32:
   2137         Opcode = NVPTX::ST_i32_ari;
   2138         break;
   2139       case MVT::i64:
   2140         Opcode = NVPTX::ST_i64_ari;
   2141         break;
   2142       case MVT::f32:
   2143         Opcode = NVPTX::ST_f32_ari;
   2144         break;
   2145       case MVT::f64:
   2146         Opcode = NVPTX::ST_f64_ari;
   2147         break;
   2148       default:
   2149         return nullptr;
   2150       }
   2151     }
   2152     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
   2153                       getI32Imm(vecType), getI32Imm(toType),
   2154                       getI32Imm(toTypeWidth), Base, Offset, Chain };
   2155     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   2156   } else {
   2157     if (TM.is64Bit()) {
   2158       switch (SourceVT) {
   2159       case MVT::i8:
   2160         Opcode = NVPTX::ST_i8_areg_64;
   2161         break;
   2162       case MVT::i16:
   2163         Opcode = NVPTX::ST_i16_areg_64;
   2164         break;
   2165       case MVT::i32:
   2166         Opcode = NVPTX::ST_i32_areg_64;
   2167         break;
   2168       case MVT::i64:
   2169         Opcode = NVPTX::ST_i64_areg_64;
   2170         break;
   2171       case MVT::f32:
   2172         Opcode = NVPTX::ST_f32_areg_64;
   2173         break;
   2174       case MVT::f64:
   2175         Opcode = NVPTX::ST_f64_areg_64;
   2176         break;
   2177       default:
   2178         return nullptr;
   2179       }
   2180     } else {
   2181       switch (SourceVT) {
   2182       case MVT::i8:
   2183         Opcode = NVPTX::ST_i8_areg;
   2184         break;
   2185       case MVT::i16:
   2186         Opcode = NVPTX::ST_i16_areg;
   2187         break;
   2188       case MVT::i32:
   2189         Opcode = NVPTX::ST_i32_areg;
   2190         break;
   2191       case MVT::i64:
   2192         Opcode = NVPTX::ST_i64_areg;
   2193         break;
   2194       case MVT::f32:
   2195         Opcode = NVPTX::ST_f32_areg;
   2196         break;
   2197       case MVT::f64:
   2198         Opcode = NVPTX::ST_f64_areg;
   2199         break;
   2200       default:
   2201         return nullptr;
   2202       }
   2203     }
   2204     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
   2205                       getI32Imm(vecType), getI32Imm(toType),
   2206                       getI32Imm(toTypeWidth), N2, Chain };
   2207     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   2208   }
   2209 
   2210   if (NVPTXST) {
   2211     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2212     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2213     cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2214   }
   2215 
   2216   return NVPTXST;
   2217 }
   2218 
   2219 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
   2220   SDValue Chain = N->getOperand(0);
   2221   SDValue Op1 = N->getOperand(1);
   2222   SDValue Addr, Offset, Base;
   2223   unsigned Opcode;
   2224   SDLoc DL(N);
   2225   SDNode *ST;
   2226   EVT EltVT = Op1.getValueType();
   2227   MemSDNode *MemSD = cast<MemSDNode>(N);
   2228   EVT StoreVT = MemSD->getMemoryVT();
   2229 
   2230   // Address Space Setting
   2231   unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
   2232 
   2233   if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
   2234     report_fatal_error("Cannot store to pointer that points to constant "
   2235                        "memory space");
   2236   }
   2237 
   2238   // Volatile Setting
   2239   // - .volatile is only availalble for .global and .shared
   2240   bool IsVolatile = MemSD->isVolatile();
   2241   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
   2242       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
   2243       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
   2244     IsVolatile = false;
   2245 
   2246   // Type Setting: toType + toTypeWidth
   2247   // - for integer type, always use 'u'
   2248   assert(StoreVT.isSimple() && "Store value is not simple");
   2249   MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
   2250   unsigned ToTypeWidth = ScalarVT.getSizeInBits();
   2251   unsigned ToType;
   2252   if (ScalarVT.isFloatingPoint())
   2253     ToType = NVPTX::PTXLdStInstCode::Float;
   2254   else
   2255     ToType = NVPTX::PTXLdStInstCode::Unsigned;
   2256 
   2257   SmallVector<SDValue, 12> StOps;
   2258   SDValue N2;
   2259   unsigned VecType;
   2260 
   2261   switch (N->getOpcode()) {
   2262   case NVPTXISD::StoreV2:
   2263     VecType = NVPTX::PTXLdStInstCode::V2;
   2264     StOps.push_back(N->getOperand(1));
   2265     StOps.push_back(N->getOperand(2));
   2266     N2 = N->getOperand(3);
   2267     break;
   2268   case NVPTXISD::StoreV4:
   2269     VecType = NVPTX::PTXLdStInstCode::V4;
   2270     StOps.push_back(N->getOperand(1));
   2271     StOps.push_back(N->getOperand(2));
   2272     StOps.push_back(N->getOperand(3));
   2273     StOps.push_back(N->getOperand(4));
   2274     N2 = N->getOperand(5);
   2275     break;
   2276   default:
   2277     return nullptr;
   2278   }
   2279 
   2280   StOps.push_back(getI32Imm(IsVolatile));
   2281   StOps.push_back(getI32Imm(CodeAddrSpace));
   2282   StOps.push_back(getI32Imm(VecType));
   2283   StOps.push_back(getI32Imm(ToType));
   2284   StOps.push_back(getI32Imm(ToTypeWidth));
   2285 
   2286   if (SelectDirectAddr(N2, Addr)) {
   2287     switch (N->getOpcode()) {
   2288     default:
   2289       return nullptr;
   2290     case NVPTXISD::StoreV2:
   2291       switch (EltVT.getSimpleVT().SimpleTy) {
   2292       default:
   2293         return nullptr;
   2294       case MVT::i8:
   2295         Opcode = NVPTX::STV_i8_v2_avar;
   2296         break;
   2297       case MVT::i16:
   2298         Opcode = NVPTX::STV_i16_v2_avar;
   2299         break;
   2300       case MVT::i32:
   2301         Opcode = NVPTX::STV_i32_v2_avar;
   2302         break;
   2303       case MVT::i64:
   2304         Opcode = NVPTX::STV_i64_v2_avar;
   2305         break;
   2306       case MVT::f32:
   2307         Opcode = NVPTX::STV_f32_v2_avar;
   2308         break;
   2309       case MVT::f64:
   2310         Opcode = NVPTX::STV_f64_v2_avar;
   2311         break;
   2312       }
   2313       break;
   2314     case NVPTXISD::StoreV4:
   2315       switch (EltVT.getSimpleVT().SimpleTy) {
   2316       default:
   2317         return nullptr;
   2318       case MVT::i8:
   2319         Opcode = NVPTX::STV_i8_v4_avar;
   2320         break;
   2321       case MVT::i16:
   2322         Opcode = NVPTX::STV_i16_v4_avar;
   2323         break;
   2324       case MVT::i32:
   2325         Opcode = NVPTX::STV_i32_v4_avar;
   2326         break;
   2327       case MVT::f32:
   2328         Opcode = NVPTX::STV_f32_v4_avar;
   2329         break;
   2330       }
   2331       break;
   2332     }
   2333     StOps.push_back(Addr);
   2334   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
   2335                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
   2336     switch (N->getOpcode()) {
   2337     default:
   2338       return nullptr;
   2339     case NVPTXISD::StoreV2:
   2340       switch (EltVT.getSimpleVT().SimpleTy) {
   2341       default:
   2342         return nullptr;
   2343       case MVT::i8:
   2344         Opcode = NVPTX::STV_i8_v2_asi;
   2345         break;
   2346       case MVT::i16:
   2347         Opcode = NVPTX::STV_i16_v2_asi;
   2348         break;
   2349       case MVT::i32:
   2350         Opcode = NVPTX::STV_i32_v2_asi;
   2351         break;
   2352       case MVT::i64:
   2353         Opcode = NVPTX::STV_i64_v2_asi;
   2354         break;
   2355       case MVT::f32:
   2356         Opcode = NVPTX::STV_f32_v2_asi;
   2357         break;
   2358       case MVT::f64:
   2359         Opcode = NVPTX::STV_f64_v2_asi;
   2360         break;
   2361       }
   2362       break;
   2363     case NVPTXISD::StoreV4:
   2364       switch (EltVT.getSimpleVT().SimpleTy) {
   2365       default:
   2366         return nullptr;
   2367       case MVT::i8:
   2368         Opcode = NVPTX::STV_i8_v4_asi;
   2369         break;
   2370       case MVT::i16:
   2371         Opcode = NVPTX::STV_i16_v4_asi;
   2372         break;
   2373       case MVT::i32:
   2374         Opcode = NVPTX::STV_i32_v4_asi;
   2375         break;
   2376       case MVT::f32:
   2377         Opcode = NVPTX::STV_f32_v4_asi;
   2378         break;
   2379       }
   2380       break;
   2381     }
   2382     StOps.push_back(Base);
   2383     StOps.push_back(Offset);
   2384   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
   2385                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
   2386     if (TM.is64Bit()) {
   2387       switch (N->getOpcode()) {
   2388       default:
   2389         return nullptr;
   2390       case NVPTXISD::StoreV2:
   2391         switch (EltVT.getSimpleVT().SimpleTy) {
   2392         default:
   2393           return nullptr;
   2394         case MVT::i8:
   2395           Opcode = NVPTX::STV_i8_v2_ari_64;
   2396           break;
   2397         case MVT::i16:
   2398           Opcode = NVPTX::STV_i16_v2_ari_64;
   2399           break;
   2400         case MVT::i32:
   2401           Opcode = NVPTX::STV_i32_v2_ari_64;
   2402           break;
   2403         case MVT::i64:
   2404           Opcode = NVPTX::STV_i64_v2_ari_64;
   2405           break;
   2406         case MVT::f32:
   2407           Opcode = NVPTX::STV_f32_v2_ari_64;
   2408           break;
   2409         case MVT::f64:
   2410           Opcode = NVPTX::STV_f64_v2_ari_64;
   2411           break;
   2412         }
   2413         break;
   2414       case NVPTXISD::StoreV4:
   2415         switch (EltVT.getSimpleVT().SimpleTy) {
   2416         default:
   2417           return nullptr;
   2418         case MVT::i8:
   2419           Opcode = NVPTX::STV_i8_v4_ari_64;
   2420           break;
   2421         case MVT::i16:
   2422           Opcode = NVPTX::STV_i16_v4_ari_64;
   2423           break;
   2424         case MVT::i32:
   2425           Opcode = NVPTX::STV_i32_v4_ari_64;
   2426           break;
   2427         case MVT::f32:
   2428           Opcode = NVPTX::STV_f32_v4_ari_64;
   2429           break;
   2430         }
   2431         break;
   2432       }
   2433     } else {
   2434       switch (N->getOpcode()) {
   2435       default:
   2436         return nullptr;
   2437       case NVPTXISD::StoreV2:
   2438         switch (EltVT.getSimpleVT().SimpleTy) {
   2439         default:
   2440           return nullptr;
   2441         case MVT::i8:
   2442           Opcode = NVPTX::STV_i8_v2_ari;
   2443           break;
   2444         case MVT::i16:
   2445           Opcode = NVPTX::STV_i16_v2_ari;
   2446           break;
   2447         case MVT::i32:
   2448           Opcode = NVPTX::STV_i32_v2_ari;
   2449           break;
   2450         case MVT::i64:
   2451           Opcode = NVPTX::STV_i64_v2_ari;
   2452           break;
   2453         case MVT::f32:
   2454           Opcode = NVPTX::STV_f32_v2_ari;
   2455           break;
   2456         case MVT::f64:
   2457           Opcode = NVPTX::STV_f64_v2_ari;
   2458           break;
   2459         }
   2460         break;
   2461       case NVPTXISD::StoreV4:
   2462         switch (EltVT.getSimpleVT().SimpleTy) {
   2463         default:
   2464           return nullptr;
   2465         case MVT::i8:
   2466           Opcode = NVPTX::STV_i8_v4_ari;
   2467           break;
   2468         case MVT::i16:
   2469           Opcode = NVPTX::STV_i16_v4_ari;
   2470           break;
   2471         case MVT::i32:
   2472           Opcode = NVPTX::STV_i32_v4_ari;
   2473           break;
   2474         case MVT::f32:
   2475           Opcode = NVPTX::STV_f32_v4_ari;
   2476           break;
   2477         }
   2478         break;
   2479       }
   2480     }
   2481     StOps.push_back(Base);
   2482     StOps.push_back(Offset);
   2483   } else {
   2484     if (TM.is64Bit()) {
   2485       switch (N->getOpcode()) {
   2486       default:
   2487         return nullptr;
   2488       case NVPTXISD::StoreV2:
   2489         switch (EltVT.getSimpleVT().SimpleTy) {
   2490         default:
   2491           return nullptr;
   2492         case MVT::i8:
   2493           Opcode = NVPTX::STV_i8_v2_areg_64;
   2494           break;
   2495         case MVT::i16:
   2496           Opcode = NVPTX::STV_i16_v2_areg_64;
   2497           break;
   2498         case MVT::i32:
   2499           Opcode = NVPTX::STV_i32_v2_areg_64;
   2500           break;
   2501         case MVT::i64:
   2502           Opcode = NVPTX::STV_i64_v2_areg_64;
   2503           break;
   2504         case MVT::f32:
   2505           Opcode = NVPTX::STV_f32_v2_areg_64;
   2506           break;
   2507         case MVT::f64:
   2508           Opcode = NVPTX::STV_f64_v2_areg_64;
   2509           break;
   2510         }
   2511         break;
   2512       case NVPTXISD::StoreV4:
   2513         switch (EltVT.getSimpleVT().SimpleTy) {
   2514         default:
   2515           return nullptr;
   2516         case MVT::i8:
   2517           Opcode = NVPTX::STV_i8_v4_areg_64;
   2518           break;
   2519         case MVT::i16:
   2520           Opcode = NVPTX::STV_i16_v4_areg_64;
   2521           break;
   2522         case MVT::i32:
   2523           Opcode = NVPTX::STV_i32_v4_areg_64;
   2524           break;
   2525         case MVT::f32:
   2526           Opcode = NVPTX::STV_f32_v4_areg_64;
   2527           break;
   2528         }
   2529         break;
   2530       }
   2531     } else {
   2532       switch (N->getOpcode()) {
   2533       default:
   2534         return nullptr;
   2535       case NVPTXISD::StoreV2:
   2536         switch (EltVT.getSimpleVT().SimpleTy) {
   2537         default:
   2538           return nullptr;
   2539         case MVT::i8:
   2540           Opcode = NVPTX::STV_i8_v2_areg;
   2541           break;
   2542         case MVT::i16:
   2543           Opcode = NVPTX::STV_i16_v2_areg;
   2544           break;
   2545         case MVT::i32:
   2546           Opcode = NVPTX::STV_i32_v2_areg;
   2547           break;
   2548         case MVT::i64:
   2549           Opcode = NVPTX::STV_i64_v2_areg;
   2550           break;
   2551         case MVT::f32:
   2552           Opcode = NVPTX::STV_f32_v2_areg;
   2553           break;
   2554         case MVT::f64:
   2555           Opcode = NVPTX::STV_f64_v2_areg;
   2556           break;
   2557         }
   2558         break;
   2559       case NVPTXISD::StoreV4:
   2560         switch (EltVT.getSimpleVT().SimpleTy) {
   2561         default:
   2562           return nullptr;
   2563         case MVT::i8:
   2564           Opcode = NVPTX::STV_i8_v4_areg;
   2565           break;
   2566         case MVT::i16:
   2567           Opcode = NVPTX::STV_i16_v4_areg;
   2568           break;
   2569         case MVT::i32:
   2570           Opcode = NVPTX::STV_i32_v4_areg;
   2571           break;
   2572         case MVT::f32:
   2573           Opcode = NVPTX::STV_f32_v4_areg;
   2574           break;
   2575         }
   2576         break;
   2577       }
   2578     }
   2579     StOps.push_back(N2);
   2580   }
   2581 
   2582   StOps.push_back(Chain);
   2583 
   2584   ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
   2585 
   2586   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2587   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2588   cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2589 
   2590   return ST;
   2591 }
   2592 
   2593 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
   2594   SDValue Chain = Node->getOperand(0);
   2595   SDValue Offset = Node->getOperand(2);
   2596   SDValue Flag = Node->getOperand(3);
   2597   SDLoc DL(Node);
   2598   MemSDNode *Mem = cast<MemSDNode>(Node);
   2599 
   2600   unsigned VecSize;
   2601   switch (Node->getOpcode()) {
   2602   default:
   2603     return nullptr;
   2604   case NVPTXISD::LoadParam:
   2605     VecSize = 1;
   2606     break;
   2607   case NVPTXISD::LoadParamV2:
   2608     VecSize = 2;
   2609     break;
   2610   case NVPTXISD::LoadParamV4:
   2611     VecSize = 4;
   2612     break;
   2613   }
   2614 
   2615   EVT EltVT = Node->getValueType(0);
   2616   EVT MemVT = Mem->getMemoryVT();
   2617 
   2618   unsigned Opc = 0;
   2619 
   2620   switch (VecSize) {
   2621   default:
   2622     return nullptr;
   2623   case 1:
   2624     switch (MemVT.getSimpleVT().SimpleTy) {
   2625     default:
   2626       return nullptr;
   2627     case MVT::i1:
   2628       Opc = NVPTX::LoadParamMemI8;
   2629       break;
   2630     case MVT::i8:
   2631       Opc = NVPTX::LoadParamMemI8;
   2632       break;
   2633     case MVT::i16:
   2634       Opc = NVPTX::LoadParamMemI16;
   2635       break;
   2636     case MVT::i32:
   2637       Opc = NVPTX::LoadParamMemI32;
   2638       break;
   2639     case MVT::i64:
   2640       Opc = NVPTX::LoadParamMemI64;
   2641       break;
   2642     case MVT::f32:
   2643       Opc = NVPTX::LoadParamMemF32;
   2644       break;
   2645     case MVT::f64:
   2646       Opc = NVPTX::LoadParamMemF64;
   2647       break;
   2648     }
   2649     break;
   2650   case 2:
   2651     switch (MemVT.getSimpleVT().SimpleTy) {
   2652     default:
   2653       return nullptr;
   2654     case MVT::i1:
   2655       Opc = NVPTX::LoadParamMemV2I8;
   2656       break;
   2657     case MVT::i8:
   2658       Opc = NVPTX::LoadParamMemV2I8;
   2659       break;
   2660     case MVT::i16:
   2661       Opc = NVPTX::LoadParamMemV2I16;
   2662       break;
   2663     case MVT::i32:
   2664       Opc = NVPTX::LoadParamMemV2I32;
   2665       break;
   2666     case MVT::i64:
   2667       Opc = NVPTX::LoadParamMemV2I64;
   2668       break;
   2669     case MVT::f32:
   2670       Opc = NVPTX::LoadParamMemV2F32;
   2671       break;
   2672     case MVT::f64:
   2673       Opc = NVPTX::LoadParamMemV2F64;
   2674       break;
   2675     }
   2676     break;
   2677   case 4:
   2678     switch (MemVT.getSimpleVT().SimpleTy) {
   2679     default:
   2680       return nullptr;
   2681     case MVT::i1:
   2682       Opc = NVPTX::LoadParamMemV4I8;
   2683       break;
   2684     case MVT::i8:
   2685       Opc = NVPTX::LoadParamMemV4I8;
   2686       break;
   2687     case MVT::i16:
   2688       Opc = NVPTX::LoadParamMemV4I16;
   2689       break;
   2690     case MVT::i32:
   2691       Opc = NVPTX::LoadParamMemV4I32;
   2692       break;
   2693     case MVT::f32:
   2694       Opc = NVPTX::LoadParamMemV4F32;
   2695       break;
   2696     }
   2697     break;
   2698   }
   2699 
   2700   SDVTList VTs;
   2701   if (VecSize == 1) {
   2702     VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
   2703   } else if (VecSize == 2) {
   2704     VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
   2705   } else {
   2706     EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
   2707     VTs = CurDAG->getVTList(EVTs);
   2708   }
   2709 
   2710   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
   2711 
   2712   SmallVector<SDValue, 2> Ops;
   2713   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
   2714   Ops.push_back(Chain);
   2715   Ops.push_back(Flag);
   2716 
   2717   SDNode *Ret =
   2718       CurDAG->getMachineNode(Opc, DL, VTs, Ops);
   2719   return Ret;
   2720 }
   2721 
   2722 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
   2723   SDLoc DL(N);
   2724   SDValue Chain = N->getOperand(0);
   2725   SDValue Offset = N->getOperand(1);
   2726   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
   2727   MemSDNode *Mem = cast<MemSDNode>(N);
   2728 
   2729   // How many elements do we have?
   2730   unsigned NumElts = 1;
   2731   switch (N->getOpcode()) {
   2732   default:
   2733     return nullptr;
   2734   case NVPTXISD::StoreRetval:
   2735     NumElts = 1;
   2736     break;
   2737   case NVPTXISD::StoreRetvalV2:
   2738     NumElts = 2;
   2739     break;
   2740   case NVPTXISD::StoreRetvalV4:
   2741     NumElts = 4;
   2742     break;
   2743   }
   2744 
   2745   // Build vector of operands
   2746   SmallVector<SDValue, 6> Ops;
   2747   for (unsigned i = 0; i < NumElts; ++i)
   2748     Ops.push_back(N->getOperand(i + 2));
   2749   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
   2750   Ops.push_back(Chain);
   2751 
   2752   // Determine target opcode
   2753   // If we have an i1, use an 8-bit store. The lowering code in
   2754   // NVPTXISelLowering will have already emitted an upcast.
   2755   unsigned Opcode = 0;
   2756   switch (NumElts) {
   2757   default:
   2758     return nullptr;
   2759   case 1:
   2760     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2761     default:
   2762       return nullptr;
   2763     case MVT::i1:
   2764       Opcode = NVPTX::StoreRetvalI8;
   2765       break;
   2766     case MVT::i8:
   2767       Opcode = NVPTX::StoreRetvalI8;
   2768       break;
   2769     case MVT::i16:
   2770       Opcode = NVPTX::StoreRetvalI16;
   2771       break;
   2772     case MVT::i32:
   2773       Opcode = NVPTX::StoreRetvalI32;
   2774       break;
   2775     case MVT::i64:
   2776       Opcode = NVPTX::StoreRetvalI64;
   2777       break;
   2778     case MVT::f32:
   2779       Opcode = NVPTX::StoreRetvalF32;
   2780       break;
   2781     case MVT::f64:
   2782       Opcode = NVPTX::StoreRetvalF64;
   2783       break;
   2784     }
   2785     break;
   2786   case 2:
   2787     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2788     default:
   2789       return nullptr;
   2790     case MVT::i1:
   2791       Opcode = NVPTX::StoreRetvalV2I8;
   2792       break;
   2793     case MVT::i8:
   2794       Opcode = NVPTX::StoreRetvalV2I8;
   2795       break;
   2796     case MVT::i16:
   2797       Opcode = NVPTX::StoreRetvalV2I16;
   2798       break;
   2799     case MVT::i32:
   2800       Opcode = NVPTX::StoreRetvalV2I32;
   2801       break;
   2802     case MVT::i64:
   2803       Opcode = NVPTX::StoreRetvalV2I64;
   2804       break;
   2805     case MVT::f32:
   2806       Opcode = NVPTX::StoreRetvalV2F32;
   2807       break;
   2808     case MVT::f64:
   2809       Opcode = NVPTX::StoreRetvalV2F64;
   2810       break;
   2811     }
   2812     break;
   2813   case 4:
   2814     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2815     default:
   2816       return nullptr;
   2817     case MVT::i1:
   2818       Opcode = NVPTX::StoreRetvalV4I8;
   2819       break;
   2820     case MVT::i8:
   2821       Opcode = NVPTX::StoreRetvalV4I8;
   2822       break;
   2823     case MVT::i16:
   2824       Opcode = NVPTX::StoreRetvalV4I16;
   2825       break;
   2826     case MVT::i32:
   2827       Opcode = NVPTX::StoreRetvalV4I32;
   2828       break;
   2829     case MVT::f32:
   2830       Opcode = NVPTX::StoreRetvalV4F32;
   2831       break;
   2832     }
   2833     break;
   2834   }
   2835 
   2836   SDNode *Ret =
   2837       CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
   2838   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2839   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2840   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2841 
   2842   return Ret;
   2843 }
   2844 
   2845 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
   2846   SDLoc DL(N);
   2847   SDValue Chain = N->getOperand(0);
   2848   SDValue Param = N->getOperand(1);
   2849   unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
   2850   SDValue Offset = N->getOperand(2);
   2851   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
   2852   MemSDNode *Mem = cast<MemSDNode>(N);
   2853   SDValue Flag = N->getOperand(N->getNumOperands() - 1);
   2854 
   2855   // How many elements do we have?
   2856   unsigned NumElts = 1;
   2857   switch (N->getOpcode()) {
   2858   default:
   2859     return nullptr;
   2860   case NVPTXISD::StoreParamU32:
   2861   case NVPTXISD::StoreParamS32:
   2862   case NVPTXISD::StoreParam:
   2863     NumElts = 1;
   2864     break;
   2865   case NVPTXISD::StoreParamV2:
   2866     NumElts = 2;
   2867     break;
   2868   case NVPTXISD::StoreParamV4:
   2869     NumElts = 4;
   2870     break;
   2871   }
   2872 
   2873   // Build vector of operands
   2874   SmallVector<SDValue, 8> Ops;
   2875   for (unsigned i = 0; i < NumElts; ++i)
   2876     Ops.push_back(N->getOperand(i + 3));
   2877   Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
   2878   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
   2879   Ops.push_back(Chain);
   2880   Ops.push_back(Flag);
   2881 
   2882   // Determine target opcode
   2883   // If we have an i1, use an 8-bit store. The lowering code in
   2884   // NVPTXISelLowering will have already emitted an upcast.
   2885   unsigned Opcode = 0;
   2886   switch (N->getOpcode()) {
   2887   default:
   2888     switch (NumElts) {
   2889     default:
   2890       return nullptr;
   2891     case 1:
   2892       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2893       default:
   2894         return nullptr;
   2895       case MVT::i1:
   2896         Opcode = NVPTX::StoreParamI8;
   2897         break;
   2898       case MVT::i8:
   2899         Opcode = NVPTX::StoreParamI8;
   2900         break;
   2901       case MVT::i16:
   2902         Opcode = NVPTX::StoreParamI16;
   2903         break;
   2904       case MVT::i32:
   2905         Opcode = NVPTX::StoreParamI32;
   2906         break;
   2907       case MVT::i64:
   2908         Opcode = NVPTX::StoreParamI64;
   2909         break;
   2910       case MVT::f32:
   2911         Opcode = NVPTX::StoreParamF32;
   2912         break;
   2913       case MVT::f64:
   2914         Opcode = NVPTX::StoreParamF64;
   2915         break;
   2916       }
   2917       break;
   2918     case 2:
   2919       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2920       default:
   2921         return nullptr;
   2922       case MVT::i1:
   2923         Opcode = NVPTX::StoreParamV2I8;
   2924         break;
   2925       case MVT::i8:
   2926         Opcode = NVPTX::StoreParamV2I8;
   2927         break;
   2928       case MVT::i16:
   2929         Opcode = NVPTX::StoreParamV2I16;
   2930         break;
   2931       case MVT::i32:
   2932         Opcode = NVPTX::StoreParamV2I32;
   2933         break;
   2934       case MVT::i64:
   2935         Opcode = NVPTX::StoreParamV2I64;
   2936         break;
   2937       case MVT::f32:
   2938         Opcode = NVPTX::StoreParamV2F32;
   2939         break;
   2940       case MVT::f64:
   2941         Opcode = NVPTX::StoreParamV2F64;
   2942         break;
   2943       }
   2944       break;
   2945     case 4:
   2946       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2947       default:
   2948         return nullptr;
   2949       case MVT::i1:
   2950         Opcode = NVPTX::StoreParamV4I8;
   2951         break;
   2952       case MVT::i8:
   2953         Opcode = NVPTX::StoreParamV4I8;
   2954         break;
   2955       case MVT::i16:
   2956         Opcode = NVPTX::StoreParamV4I16;
   2957         break;
   2958       case MVT::i32:
   2959         Opcode = NVPTX::StoreParamV4I32;
   2960         break;
   2961       case MVT::f32:
   2962         Opcode = NVPTX::StoreParamV4F32;
   2963         break;
   2964       }
   2965       break;
   2966     }
   2967     break;
   2968   // Special case: if we have a sign-extend/zero-extend node, insert the
   2969   // conversion instruction first, and use that as the value operand to
   2970   // the selected StoreParam node.
   2971   case NVPTXISD::StoreParamU32: {
   2972     Opcode = NVPTX::StoreParamI32;
   2973     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
   2974                                                 MVT::i32);
   2975     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
   2976                                          MVT::i32, Ops[0], CvtNone);
   2977     Ops[0] = SDValue(Cvt, 0);
   2978     break;
   2979   }
   2980   case NVPTXISD::StoreParamS32: {
   2981     Opcode = NVPTX::StoreParamI32;
   2982     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
   2983                                                 MVT::i32);
   2984     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
   2985                                          MVT::i32, Ops[0], CvtNone);
   2986     Ops[0] = SDValue(Cvt, 0);
   2987     break;
   2988   }
   2989   }
   2990 
   2991   SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
   2992   SDNode *Ret =
   2993       CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
   2994   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2995   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2996   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2997 
   2998   return Ret;
   2999 }
   3000 
   3001 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
   3002   SDValue Chain = N->getOperand(0);
   3003   SDNode *Ret = nullptr;
   3004   unsigned Opc = 0;
   3005   SmallVector<SDValue, 8> Ops;
   3006 
   3007   switch (N->getOpcode()) {
   3008   default: return nullptr;
   3009   case NVPTXISD::Tex1DFloatS32:
   3010     Opc = NVPTX::TEX_1D_F32_S32;
   3011     break;
   3012   case NVPTXISD::Tex1DFloatFloat:
   3013     Opc = NVPTX::TEX_1D_F32_F32;
   3014     break;
   3015   case NVPTXISD::Tex1DFloatFloatLevel:
   3016     Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
   3017     break;
   3018   case NVPTXISD::Tex1DFloatFloatGrad:
   3019     Opc = NVPTX::TEX_1D_F32_F32_GRAD;
   3020     break;
   3021   case NVPTXISD::Tex1DS32S32:
   3022     Opc = NVPTX::TEX_1D_S32_S32;
   3023     break;
   3024   case NVPTXISD::Tex1DS32Float:
   3025     Opc = NVPTX::TEX_1D_S32_F32;
   3026     break;
   3027   case NVPTXISD::Tex1DS32FloatLevel:
   3028     Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
   3029     break;
   3030   case NVPTXISD::Tex1DS32FloatGrad:
   3031     Opc = NVPTX::TEX_1D_S32_F32_GRAD;
   3032     break;
   3033   case NVPTXISD::Tex1DU32S32:
   3034     Opc = NVPTX::TEX_1D_U32_S32;
   3035     break;
   3036   case NVPTXISD::Tex1DU32Float:
   3037     Opc = NVPTX::TEX_1D_U32_F32;
   3038     break;
   3039   case NVPTXISD::Tex1DU32FloatLevel:
   3040     Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
   3041     break;
   3042   case NVPTXISD::Tex1DU32FloatGrad:
   3043     Opc = NVPTX::TEX_1D_U32_F32_GRAD;
   3044     break;
   3045   case NVPTXISD::Tex1DArrayFloatS32:
   3046     Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
   3047     break;
   3048   case NVPTXISD::Tex1DArrayFloatFloat:
   3049     Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
   3050     break;
   3051   case NVPTXISD::Tex1DArrayFloatFloatLevel:
   3052     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
   3053     break;
   3054   case NVPTXISD::Tex1DArrayFloatFloatGrad:
   3055     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
   3056     break;
   3057   case NVPTXISD::Tex1DArrayS32S32:
   3058     Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
   3059     break;
   3060   case NVPTXISD::Tex1DArrayS32Float:
   3061     Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
   3062     break;
   3063   case NVPTXISD::Tex1DArrayS32FloatLevel:
   3064     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
   3065     break;
   3066   case NVPTXISD::Tex1DArrayS32FloatGrad:
   3067     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
   3068     break;
   3069   case NVPTXISD::Tex1DArrayU32S32:
   3070     Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
   3071     break;
   3072   case NVPTXISD::Tex1DArrayU32Float:
   3073     Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
   3074     break;
   3075   case NVPTXISD::Tex1DArrayU32FloatLevel:
   3076     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
   3077     break;
   3078   case NVPTXISD::Tex1DArrayU32FloatGrad:
   3079     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
   3080     break;
   3081   case NVPTXISD::Tex2DFloatS32:
   3082     Opc = NVPTX::TEX_2D_F32_S32;
   3083     break;
   3084   case NVPTXISD::Tex2DFloatFloat:
   3085     Opc = NVPTX::TEX_2D_F32_F32;
   3086     break;
   3087   case NVPTXISD::Tex2DFloatFloatLevel:
   3088     Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
   3089     break;
   3090   case NVPTXISD::Tex2DFloatFloatGrad:
   3091     Opc = NVPTX::TEX_2D_F32_F32_GRAD;
   3092     break;
   3093   case NVPTXISD::Tex2DS32S32:
   3094     Opc = NVPTX::TEX_2D_S32_S32;
   3095     break;
   3096   case NVPTXISD::Tex2DS32Float:
   3097     Opc = NVPTX::TEX_2D_S32_F32;
   3098     break;
   3099   case NVPTXISD::Tex2DS32FloatLevel:
   3100     Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
   3101     break;
   3102   case NVPTXISD::Tex2DS32FloatGrad:
   3103     Opc = NVPTX::TEX_2D_S32_F32_GRAD;
   3104     break;
   3105   case NVPTXISD::Tex2DU32S32:
   3106     Opc = NVPTX::TEX_2D_U32_S32;
   3107     break;
   3108   case NVPTXISD::Tex2DU32Float:
   3109     Opc = NVPTX::TEX_2D_U32_F32;
   3110     break;
   3111   case NVPTXISD::Tex2DU32FloatLevel:
   3112     Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
   3113     break;
   3114   case NVPTXISD::Tex2DU32FloatGrad:
   3115     Opc = NVPTX::TEX_2D_U32_F32_GRAD;
   3116     break;
   3117   case NVPTXISD::Tex2DArrayFloatS32:
   3118     Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
   3119     break;
   3120   case NVPTXISD::Tex2DArrayFloatFloat:
   3121     Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
   3122     break;
   3123   case NVPTXISD::Tex2DArrayFloatFloatLevel:
   3124     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
   3125     break;
   3126   case NVPTXISD::Tex2DArrayFloatFloatGrad:
   3127     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
   3128     break;
   3129   case NVPTXISD::Tex2DArrayS32S32:
   3130     Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
   3131     break;
   3132   case NVPTXISD::Tex2DArrayS32Float:
   3133     Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
   3134     break;
   3135   case NVPTXISD::Tex2DArrayS32FloatLevel:
   3136     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
   3137     break;
   3138   case NVPTXISD::Tex2DArrayS32FloatGrad:
   3139     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
   3140     break;
   3141   case NVPTXISD::Tex2DArrayU32S32:
   3142     Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
   3143     break;
   3144   case NVPTXISD::Tex2DArrayU32Float:
   3145     Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
   3146     break;
   3147   case NVPTXISD::Tex2DArrayU32FloatLevel:
   3148     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
   3149     break;
   3150   case NVPTXISD::Tex2DArrayU32FloatGrad:
   3151     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
   3152     break;
   3153   case NVPTXISD::Tex3DFloatS32:
   3154     Opc = NVPTX::TEX_3D_F32_S32;
   3155     break;
   3156   case NVPTXISD::Tex3DFloatFloat:
   3157     Opc = NVPTX::TEX_3D_F32_F32;
   3158     break;
   3159   case NVPTXISD::Tex3DFloatFloatLevel:
   3160     Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
   3161     break;
   3162   case NVPTXISD::Tex3DFloatFloatGrad:
   3163     Opc = NVPTX::TEX_3D_F32_F32_GRAD;
   3164     break;
   3165   case NVPTXISD::Tex3DS32S32:
   3166     Opc = NVPTX::TEX_3D_S32_S32;
   3167     break;
   3168   case NVPTXISD::Tex3DS32Float:
   3169     Opc = NVPTX::TEX_3D_S32_F32;
   3170     break;
   3171   case NVPTXISD::Tex3DS32FloatLevel:
   3172     Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
   3173     break;
   3174   case NVPTXISD::Tex3DS32FloatGrad:
   3175     Opc = NVPTX::TEX_3D_S32_F32_GRAD;
   3176     break;
   3177   case NVPTXISD::Tex3DU32S32:
   3178     Opc = NVPTX::TEX_3D_U32_S32;
   3179     break;
   3180   case NVPTXISD::Tex3DU32Float:
   3181     Opc = NVPTX::TEX_3D_U32_F32;
   3182     break;
   3183   case NVPTXISD::Tex3DU32FloatLevel:
   3184     Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
   3185     break;
   3186   case NVPTXISD::Tex3DU32FloatGrad:
   3187     Opc = NVPTX::TEX_3D_U32_F32_GRAD;
   3188     break;
   3189   case NVPTXISD::TexCubeFloatFloat:
   3190     Opc = NVPTX::TEX_CUBE_F32_F32;
   3191     break;
   3192   case NVPTXISD::TexCubeFloatFloatLevel:
   3193     Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
   3194     break;
   3195   case NVPTXISD::TexCubeS32Float:
   3196     Opc = NVPTX::TEX_CUBE_S32_F32;
   3197     break;
   3198   case NVPTXISD::TexCubeS32FloatLevel:
   3199     Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
   3200     break;
   3201   case NVPTXISD::TexCubeU32Float:
   3202     Opc = NVPTX::TEX_CUBE_U32_F32;
   3203     break;
   3204   case NVPTXISD::TexCubeU32FloatLevel:
   3205     Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
   3206     break;
   3207   case NVPTXISD::TexCubeArrayFloatFloat:
   3208     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
   3209     break;
   3210   case NVPTXISD::TexCubeArrayFloatFloatLevel:
   3211     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
   3212     break;
   3213   case NVPTXISD::TexCubeArrayS32Float:
   3214     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
   3215     break;
   3216   case NVPTXISD::TexCubeArrayS32FloatLevel:
   3217     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
   3218     break;
   3219   case NVPTXISD::TexCubeArrayU32Float:
   3220     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
   3221     break;
   3222   case NVPTXISD::TexCubeArrayU32FloatLevel:
   3223     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
   3224     break;
   3225   case NVPTXISD::Tld4R2DFloatFloat:
   3226     Opc = NVPTX::TLD4_R_2D_F32_F32;
   3227     break;
   3228   case NVPTXISD::Tld4G2DFloatFloat:
   3229     Opc = NVPTX::TLD4_G_2D_F32_F32;
   3230     break;
   3231   case NVPTXISD::Tld4B2DFloatFloat:
   3232     Opc = NVPTX::TLD4_B_2D_F32_F32;
   3233     break;
   3234   case NVPTXISD::Tld4A2DFloatFloat:
   3235     Opc = NVPTX::TLD4_A_2D_F32_F32;
   3236     break;
   3237   case NVPTXISD::Tld4R2DS64Float:
   3238     Opc = NVPTX::TLD4_R_2D_S32_F32;
   3239     break;
   3240   case NVPTXISD::Tld4G2DS64Float:
   3241     Opc = NVPTX::TLD4_G_2D_S32_F32;
   3242     break;
   3243   case NVPTXISD::Tld4B2DS64Float:
   3244     Opc = NVPTX::TLD4_B_2D_S32_F32;
   3245     break;
   3246   case NVPTXISD::Tld4A2DS64Float:
   3247     Opc = NVPTX::TLD4_A_2D_S32_F32;
   3248     break;
   3249   case NVPTXISD::Tld4R2DU64Float:
   3250     Opc = NVPTX::TLD4_R_2D_U32_F32;
   3251     break;
   3252   case NVPTXISD::Tld4G2DU64Float:
   3253     Opc = NVPTX::TLD4_G_2D_U32_F32;
   3254     break;
   3255   case NVPTXISD::Tld4B2DU64Float:
   3256     Opc = NVPTX::TLD4_B_2D_U32_F32;
   3257     break;
   3258   case NVPTXISD::Tld4A2DU64Float:
   3259     Opc = NVPTX::TLD4_A_2D_U32_F32;
   3260     break;
   3261   case NVPTXISD::TexUnified1DFloatS32:
   3262     Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
   3263     break;
   3264   case NVPTXISD::TexUnified1DFloatFloat:
   3265     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
   3266     break;
   3267   case NVPTXISD::TexUnified1DFloatFloatLevel:
   3268     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
   3269     break;
   3270   case NVPTXISD::TexUnified1DFloatFloatGrad:
   3271     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
   3272     break;
   3273   case NVPTXISD::TexUnified1DS32S32:
   3274     Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
   3275     break;
   3276   case NVPTXISD::TexUnified1DS32Float:
   3277     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
   3278     break;
   3279   case NVPTXISD::TexUnified1DS32FloatLevel:
   3280     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
   3281     break;
   3282   case NVPTXISD::TexUnified1DS32FloatGrad:
   3283     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
   3284     break;
   3285   case NVPTXISD::TexUnified1DU32S32:
   3286     Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
   3287     break;
   3288   case NVPTXISD::TexUnified1DU32Float:
   3289     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
   3290     break;
   3291   case NVPTXISD::TexUnified1DU32FloatLevel:
   3292     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
   3293     break;
   3294   case NVPTXISD::TexUnified1DU32FloatGrad:
   3295     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
   3296     break;
   3297   case NVPTXISD::TexUnified1DArrayFloatS32:
   3298     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
   3299     break;
   3300   case NVPTXISD::TexUnified1DArrayFloatFloat:
   3301     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
   3302     break;
   3303   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
   3304     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
   3305     break;
   3306   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
   3307     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
   3308     break;
   3309   case NVPTXISD::TexUnified1DArrayS32S32:
   3310     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
   3311     break;
   3312   case NVPTXISD::TexUnified1DArrayS32Float:
   3313     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
   3314     break;
   3315   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
   3316     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
   3317     break;
   3318   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
   3319     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
   3320     break;
   3321   case NVPTXISD::TexUnified1DArrayU32S32:
   3322     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
   3323     break;
   3324   case NVPTXISD::TexUnified1DArrayU32Float:
   3325     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
   3326     break;
   3327   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
   3328     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
   3329     break;
   3330   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
   3331     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
   3332     break;
   3333   case NVPTXISD::TexUnified2DFloatS32:
   3334     Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
   3335     break;
   3336   case NVPTXISD::TexUnified2DFloatFloat:
   3337     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
   3338     break;
   3339   case NVPTXISD::TexUnified2DFloatFloatLevel:
   3340     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
   3341     break;
   3342   case NVPTXISD::TexUnified2DFloatFloatGrad:
   3343     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
   3344     break;
   3345   case NVPTXISD::TexUnified2DS32S32:
   3346     Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
   3347     break;
   3348   case NVPTXISD::TexUnified2DS32Float:
   3349     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
   3350     break;
   3351   case NVPTXISD::TexUnified2DS32FloatLevel:
   3352     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
   3353     break;
   3354   case NVPTXISD::TexUnified2DS32FloatGrad:
   3355     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
   3356     break;
   3357   case NVPTXISD::TexUnified2DU32S32:
   3358     Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
   3359     break;
   3360   case NVPTXISD::TexUnified2DU32Float:
   3361     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
   3362     break;
   3363   case NVPTXISD::TexUnified2DU32FloatLevel:
   3364     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
   3365     break;
   3366   case NVPTXISD::TexUnified2DU32FloatGrad:
   3367     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
   3368     break;
   3369   case NVPTXISD::TexUnified2DArrayFloatS32:
   3370     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
   3371     break;
   3372   case NVPTXISD::TexUnified2DArrayFloatFloat:
   3373     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
   3374     break;
   3375   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
   3376     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
   3377     break;
   3378   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
   3379     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
   3380     break;
   3381   case NVPTXISD::TexUnified2DArrayS32S32:
   3382     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
   3383     break;
   3384   case NVPTXISD::TexUnified2DArrayS32Float:
   3385     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
   3386     break;
   3387   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
   3388     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
   3389     break;
   3390   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
   3391     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
   3392     break;
   3393   case NVPTXISD::TexUnified2DArrayU32S32:
   3394     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
   3395     break;
   3396   case NVPTXISD::TexUnified2DArrayU32Float:
   3397     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
   3398     break;
   3399   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
   3400     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
   3401     break;
   3402   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
   3403     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
   3404     break;
   3405   case NVPTXISD::TexUnified3DFloatS32:
   3406     Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
   3407     break;
   3408   case NVPTXISD::TexUnified3DFloatFloat:
   3409     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
   3410     break;
   3411   case NVPTXISD::TexUnified3DFloatFloatLevel:
   3412     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
   3413     break;
   3414   case NVPTXISD::TexUnified3DFloatFloatGrad:
   3415     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
   3416     break;
   3417   case NVPTXISD::TexUnified3DS32S32:
   3418     Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
   3419     break;
   3420   case NVPTXISD::TexUnified3DS32Float:
   3421     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
   3422     break;
   3423   case NVPTXISD::TexUnified3DS32FloatLevel:
   3424     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
   3425     break;
   3426   case NVPTXISD::TexUnified3DS32FloatGrad:
   3427     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
   3428     break;
   3429   case NVPTXISD::TexUnified3DU32S32:
   3430     Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
   3431     break;
   3432   case NVPTXISD::TexUnified3DU32Float:
   3433     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
   3434     break;
   3435   case NVPTXISD::TexUnified3DU32FloatLevel:
   3436     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
   3437     break;
   3438   case NVPTXISD::TexUnified3DU32FloatGrad:
   3439     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
   3440     break;
   3441   case NVPTXISD::TexUnifiedCubeFloatFloat:
   3442     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
   3443     break;
   3444   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
   3445     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
   3446     break;
   3447   case NVPTXISD::TexUnifiedCubeS32Float:
   3448     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
   3449     break;
   3450   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
   3451     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
   3452     break;
   3453   case NVPTXISD::TexUnifiedCubeU32Float:
   3454     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
   3455     break;
   3456   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
   3457     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
   3458     break;
   3459   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
   3460     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
   3461     break;
   3462   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
   3463     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
   3464     break;
   3465   case NVPTXISD::TexUnifiedCubeArrayS32Float:
   3466     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
   3467     break;
   3468   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
   3469     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
   3470     break;
   3471   case NVPTXISD::TexUnifiedCubeArrayU32Float:
   3472     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
   3473     break;
   3474   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
   3475     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
   3476     break;
   3477   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
   3478     Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
   3479     break;
   3480   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
   3481     Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
   3482     break;
   3483   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
   3484     Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
   3485     break;
   3486   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
   3487     Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
   3488     break;
   3489   case NVPTXISD::Tld4UnifiedR2DS64Float:
   3490     Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
   3491     break;
   3492   case NVPTXISD::Tld4UnifiedG2DS64Float:
   3493     Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
   3494     break;
   3495   case NVPTXISD::Tld4UnifiedB2DS64Float:
   3496     Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
   3497     break;
   3498   case NVPTXISD::Tld4UnifiedA2DS64Float:
   3499     Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
   3500     break;
   3501   case NVPTXISD::Tld4UnifiedR2DU64Float:
   3502     Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
   3503     break;
   3504   case NVPTXISD::Tld4UnifiedG2DU64Float:
   3505     Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
   3506     break;
   3507   case NVPTXISD::Tld4UnifiedB2DU64Float:
   3508     Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
   3509     break;
   3510   case NVPTXISD::Tld4UnifiedA2DU64Float:
   3511     Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
   3512     break;
   3513   }
   3514 
   3515   // Copy over operands
   3516   for (unsigned i = 1; i < N->getNumOperands(); ++i) {
   3517     Ops.push_back(N->getOperand(i));
   3518   }
   3519 
   3520   Ops.push_back(Chain);
   3521   Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
   3522   return Ret;
   3523 }
   3524 
   3525 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
   3526   SDValue Chain = N->getOperand(0);
   3527   SDValue TexHandle = N->getOperand(1);
   3528   SDNode *Ret = nullptr;
   3529   unsigned Opc = 0;
   3530   SmallVector<SDValue, 8> Ops;
   3531   switch (N->getOpcode()) {
   3532   default: return nullptr;
   3533   case NVPTXISD::Suld1DI8Clamp:
   3534     Opc = NVPTX::SULD_1D_I8_CLAMP;
   3535     Ops.push_back(TexHandle);
   3536     Ops.push_back(N->getOperand(2));
   3537     Ops.push_back(Chain);
   3538     break;
   3539   case NVPTXISD::Suld1DI16Clamp:
   3540     Opc = NVPTX::SULD_1D_I16_CLAMP;
   3541     Ops.push_back(TexHandle);
   3542     Ops.push_back(N->getOperand(2));
   3543     Ops.push_back(Chain);
   3544     break;
   3545   case NVPTXISD::Suld1DI32Clamp:
   3546     Opc = NVPTX::SULD_1D_I32_CLAMP;
   3547     Ops.push_back(TexHandle);
   3548     Ops.push_back(N->getOperand(2));
   3549     Ops.push_back(Chain);
   3550     break;
   3551   case NVPTXISD::Suld1DI64Clamp:
   3552     Opc = NVPTX::SULD_1D_I64_CLAMP;
   3553     Ops.push_back(TexHandle);
   3554     Ops.push_back(N->getOperand(2));
   3555     Ops.push_back(Chain);
   3556     break;
   3557   case NVPTXISD::Suld1DV2I8Clamp:
   3558     Opc = NVPTX::SULD_1D_V2I8_CLAMP;
   3559     Ops.push_back(TexHandle);
   3560     Ops.push_back(N->getOperand(2));
   3561     Ops.push_back(Chain);
   3562     break;
   3563   case NVPTXISD::Suld1DV2I16Clamp:
   3564     Opc = NVPTX::SULD_1D_V2I16_CLAMP;
   3565     Ops.push_back(TexHandle);
   3566     Ops.push_back(N->getOperand(2));
   3567     Ops.push_back(Chain);
   3568     break;
   3569   case NVPTXISD::Suld1DV2I32Clamp:
   3570     Opc = NVPTX::SULD_1D_V2I32_CLAMP;
   3571     Ops.push_back(TexHandle);
   3572     Ops.push_back(N->getOperand(2));
   3573     Ops.push_back(Chain);
   3574     break;
   3575   case NVPTXISD::Suld1DV2I64Clamp:
   3576     Opc = NVPTX::SULD_1D_V2I64_CLAMP;
   3577     Ops.push_back(TexHandle);
   3578     Ops.push_back(N->getOperand(2));
   3579     Ops.push_back(Chain);
   3580     break;
   3581   case NVPTXISD::Suld1DV4I8Clamp:
   3582     Opc = NVPTX::SULD_1D_V4I8_CLAMP;
   3583     Ops.push_back(TexHandle);
   3584     Ops.push_back(N->getOperand(2));
   3585     Ops.push_back(Chain);
   3586     break;
   3587   case NVPTXISD::Suld1DV4I16Clamp:
   3588     Opc = NVPTX::SULD_1D_V4I16_CLAMP;
   3589     Ops.push_back(TexHandle);
   3590     Ops.push_back(N->getOperand(2));
   3591     Ops.push_back(Chain);
   3592     break;
   3593   case NVPTXISD::Suld1DV4I32Clamp:
   3594     Opc = NVPTX::SULD_1D_V4I32_CLAMP;
   3595     Ops.push_back(TexHandle);
   3596     Ops.push_back(N->getOperand(2));
   3597     Ops.push_back(Chain);
   3598     break;
   3599   case NVPTXISD::Suld1DArrayI8Clamp:
   3600     Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
   3601     Ops.push_back(TexHandle);
   3602     Ops.push_back(N->getOperand(2));
   3603     Ops.push_back(N->getOperand(3));
   3604     Ops.push_back(Chain);
   3605     break;
   3606   case NVPTXISD::Suld1DArrayI16Clamp:
   3607     Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
   3608     Ops.push_back(TexHandle);
   3609     Ops.push_back(N->getOperand(2));
   3610     Ops.push_back(N->getOperand(3));
   3611     Ops.push_back(Chain);
   3612     break;
   3613   case NVPTXISD::Suld1DArrayI32Clamp:
   3614     Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
   3615     Ops.push_back(TexHandle);
   3616     Ops.push_back(N->getOperand(2));
   3617     Ops.push_back(N->getOperand(3));
   3618     Ops.push_back(Chain);
   3619     break;
   3620   case NVPTXISD::Suld1DArrayI64Clamp:
   3621     Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
   3622     Ops.push_back(TexHandle);
   3623     Ops.push_back(N->getOperand(2));
   3624     Ops.push_back(N->getOperand(3));
   3625     Ops.push_back(Chain);
   3626     break;
   3627   case NVPTXISD::Suld1DArrayV2I8Clamp:
   3628     Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
   3629     Ops.push_back(TexHandle);
   3630     Ops.push_back(N->getOperand(2));
   3631     Ops.push_back(N->getOperand(3));
   3632     Ops.push_back(Chain);
   3633     break;
   3634   case NVPTXISD::Suld1DArrayV2I16Clamp:
   3635     Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
   3636     Ops.push_back(TexHandle);
   3637     Ops.push_back(N->getOperand(2));
   3638     Ops.push_back(N->getOperand(3));
   3639     Ops.push_back(Chain);
   3640     break;
   3641   case NVPTXISD::Suld1DArrayV2I32Clamp:
   3642     Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
   3643     Ops.push_back(TexHandle);
   3644     Ops.push_back(N->getOperand(2));
   3645     Ops.push_back(N->getOperand(3));
   3646     Ops.push_back(Chain);
   3647     break;
   3648   case NVPTXISD::Suld1DArrayV2I64Clamp:
   3649     Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
   3650     Ops.push_back(TexHandle);
   3651     Ops.push_back(N->getOperand(2));
   3652     Ops.push_back(N->getOperand(3));
   3653     Ops.push_back(Chain);
   3654     break;
   3655   case NVPTXISD::Suld1DArrayV4I8Clamp:
   3656     Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
   3657     Ops.push_back(TexHandle);
   3658     Ops.push_back(N->getOperand(2));
   3659     Ops.push_back(N->getOperand(3));
   3660     Ops.push_back(Chain);
   3661     break;
   3662   case NVPTXISD::Suld1DArrayV4I16Clamp:
   3663     Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
   3664     Ops.push_back(TexHandle);
   3665     Ops.push_back(N->getOperand(2));
   3666     Ops.push_back(N->getOperand(3));
   3667     Ops.push_back(Chain);
   3668     break;
   3669   case NVPTXISD::Suld1DArrayV4I32Clamp:
   3670     Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
   3671     Ops.push_back(TexHandle);
   3672     Ops.push_back(N->getOperand(2));
   3673     Ops.push_back(N->getOperand(3));
   3674     Ops.push_back(Chain);
   3675     break;
   3676   case NVPTXISD::Suld2DI8Clamp:
   3677     Opc = NVPTX::SULD_2D_I8_CLAMP;
   3678     Ops.push_back(TexHandle);
   3679     Ops.push_back(N->getOperand(2));
   3680     Ops.push_back(N->getOperand(3));
   3681     Ops.push_back(Chain);
   3682     break;
   3683   case NVPTXISD::Suld2DI16Clamp:
   3684     Opc = NVPTX::SULD_2D_I16_CLAMP;
   3685     Ops.push_back(TexHandle);
   3686     Ops.push_back(N->getOperand(2));
   3687     Ops.push_back(N->getOperand(3));
   3688     Ops.push_back(Chain);
   3689     break;
   3690   case NVPTXISD::Suld2DI32Clamp:
   3691     Opc = NVPTX::SULD_2D_I32_CLAMP;
   3692     Ops.push_back(TexHandle);
   3693     Ops.push_back(N->getOperand(2));
   3694     Ops.push_back(N->getOperand(3));
   3695     Ops.push_back(Chain);
   3696     break;
   3697   case NVPTXISD::Suld2DI64Clamp:
   3698     Opc = NVPTX::SULD_2D_I64_CLAMP;
   3699     Ops.push_back(TexHandle);
   3700     Ops.push_back(N->getOperand(2));
   3701     Ops.push_back(N->getOperand(3));
   3702     Ops.push_back(Chain);
   3703     break;
   3704   case NVPTXISD::Suld2DV2I8Clamp:
   3705     Opc = NVPTX::SULD_2D_V2I8_CLAMP;
   3706     Ops.push_back(TexHandle);
   3707     Ops.push_back(N->getOperand(2));
   3708     Ops.push_back(N->getOperand(3));
   3709     Ops.push_back(Chain);
   3710     break;
   3711   case NVPTXISD::Suld2DV2I16Clamp:
   3712     Opc = NVPTX::SULD_2D_V2I16_CLAMP;
   3713     Ops.push_back(TexHandle);
   3714     Ops.push_back(N->getOperand(2));
   3715     Ops.push_back(N->getOperand(3));
   3716     Ops.push_back(Chain);
   3717     break;
   3718   case NVPTXISD::Suld2DV2I32Clamp:
   3719     Opc = NVPTX::SULD_2D_V2I32_CLAMP;
   3720     Ops.push_back(TexHandle);
   3721     Ops.push_back(N->getOperand(2));
   3722     Ops.push_back(N->getOperand(3));
   3723     Ops.push_back(Chain);
   3724     break;
   3725   case NVPTXISD::Suld2DV2I64Clamp:
   3726     Opc = NVPTX::SULD_2D_V2I64_CLAMP;
   3727     Ops.push_back(TexHandle);
   3728     Ops.push_back(N->getOperand(2));
   3729     Ops.push_back(N->getOperand(3));
   3730     Ops.push_back(Chain);
   3731     break;
   3732   case NVPTXISD::Suld2DV4I8Clamp:
   3733     Opc = NVPTX::SULD_2D_V4I8_CLAMP;
   3734     Ops.push_back(TexHandle);
   3735     Ops.push_back(N->getOperand(2));
   3736     Ops.push_back(N->getOperand(3));
   3737     Ops.push_back(Chain);
   3738     break;
   3739   case NVPTXISD::Suld2DV4I16Clamp:
   3740     Opc = NVPTX::SULD_2D_V4I16_CLAMP;
   3741     Ops.push_back(TexHandle);
   3742     Ops.push_back(N->getOperand(2));
   3743     Ops.push_back(N->getOperand(3));
   3744     Ops.push_back(Chain);
   3745     break;
   3746   case NVPTXISD::Suld2DV4I32Clamp:
   3747     Opc = NVPTX::SULD_2D_V4I32_CLAMP;
   3748     Ops.push_back(TexHandle);
   3749     Ops.push_back(N->getOperand(2));
   3750     Ops.push_back(N->getOperand(3));
   3751     Ops.push_back(Chain);
   3752     break;
   3753   case NVPTXISD::Suld2DArrayI8Clamp:
   3754     Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
   3755     Ops.push_back(TexHandle);
   3756     Ops.push_back(N->getOperand(2));
   3757     Ops.push_back(N->getOperand(3));
   3758     Ops.push_back(N->getOperand(4));
   3759     Ops.push_back(Chain);
   3760     break;
   3761   case NVPTXISD::Suld2DArrayI16Clamp:
   3762     Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
   3763     Ops.push_back(TexHandle);
   3764     Ops.push_back(N->getOperand(2));
   3765     Ops.push_back(N->getOperand(3));
   3766     Ops.push_back(N->getOperand(4));
   3767     Ops.push_back(Chain);
   3768     break;
   3769   case NVPTXISD::Suld2DArrayI32Clamp:
   3770     Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
   3771     Ops.push_back(TexHandle);
   3772     Ops.push_back(N->getOperand(2));
   3773     Ops.push_back(N->getOperand(3));
   3774     Ops.push_back(N->getOperand(4));
   3775     Ops.push_back(Chain);
   3776     break;
   3777   case NVPTXISD::Suld2DArrayI64Clamp:
   3778     Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
   3779     Ops.push_back(TexHandle);
   3780     Ops.push_back(N->getOperand(2));
   3781     Ops.push_back(N->getOperand(3));
   3782     Ops.push_back(N->getOperand(4));
   3783     Ops.push_back(Chain);
   3784     break;
   3785   case NVPTXISD::Suld2DArrayV2I8Clamp:
   3786     Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
   3787     Ops.push_back(TexHandle);
   3788     Ops.push_back(N->getOperand(2));
   3789     Ops.push_back(N->getOperand(3));
   3790     Ops.push_back(N->getOperand(4));
   3791     Ops.push_back(Chain);
   3792     break;
   3793   case NVPTXISD::Suld2DArrayV2I16Clamp:
   3794     Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
   3795     Ops.push_back(TexHandle);
   3796     Ops.push_back(N->getOperand(2));
   3797     Ops.push_back(N->getOperand(3));
   3798     Ops.push_back(N->getOperand(4));
   3799     Ops.push_back(Chain);
   3800     break;
   3801   case NVPTXISD::Suld2DArrayV2I32Clamp:
   3802     Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
   3803     Ops.push_back(TexHandle);
   3804     Ops.push_back(N->getOperand(2));
   3805     Ops.push_back(N->getOperand(3));
   3806     Ops.push_back(N->getOperand(4));
   3807     Ops.push_back(Chain);
   3808     break;
   3809   case NVPTXISD::Suld2DArrayV2I64Clamp:
   3810     Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
   3811     Ops.push_back(TexHandle);
   3812     Ops.push_back(N->getOperand(2));
   3813     Ops.push_back(N->getOperand(3));
   3814     Ops.push_back(N->getOperand(4));
   3815     Ops.push_back(Chain);
   3816     break;
   3817   case NVPTXISD::Suld2DArrayV4I8Clamp:
   3818     Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
   3819     Ops.push_back(TexHandle);
   3820     Ops.push_back(N->getOperand(2));
   3821     Ops.push_back(N->getOperand(3));
   3822     Ops.push_back(N->getOperand(4));
   3823     Ops.push_back(Chain);
   3824     break;
   3825   case NVPTXISD::Suld2DArrayV4I16Clamp:
   3826     Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
   3827     Ops.push_back(TexHandle);
   3828     Ops.push_back(N->getOperand(2));
   3829     Ops.push_back(N->getOperand(3));
   3830     Ops.push_back(N->getOperand(4));
   3831     Ops.push_back(Chain);
   3832     break;
   3833   case NVPTXISD::Suld2DArrayV4I32Clamp:
   3834     Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
   3835     Ops.push_back(TexHandle);
   3836     Ops.push_back(N->getOperand(2));
   3837     Ops.push_back(N->getOperand(3));
   3838     Ops.push_back(N->getOperand(4));
   3839     Ops.push_back(Chain);
   3840     break;
   3841   case NVPTXISD::Suld3DI8Clamp:
   3842     Opc = NVPTX::SULD_3D_I8_CLAMP;
   3843     Ops.push_back(TexHandle);
   3844     Ops.push_back(N->getOperand(2));
   3845     Ops.push_back(N->getOperand(3));
   3846     Ops.push_back(N->getOperand(4));
   3847     Ops.push_back(Chain);
   3848     break;
   3849   case NVPTXISD::Suld3DI16Clamp:
   3850     Opc = NVPTX::SULD_3D_I16_CLAMP;
   3851     Ops.push_back(TexHandle);
   3852     Ops.push_back(N->getOperand(2));
   3853     Ops.push_back(N->getOperand(3));
   3854     Ops.push_back(N->getOperand(4));
   3855     Ops.push_back(Chain);
   3856     break;
   3857   case NVPTXISD::Suld3DI32Clamp:
   3858     Opc = NVPTX::SULD_3D_I32_CLAMP;
   3859     Ops.push_back(TexHandle);
   3860     Ops.push_back(N->getOperand(2));
   3861     Ops.push_back(N->getOperand(3));
   3862     Ops.push_back(N->getOperand(4));
   3863     Ops.push_back(Chain);
   3864     break;
   3865   case NVPTXISD::Suld3DI64Clamp:
   3866     Opc = NVPTX::SULD_3D_I64_CLAMP;
   3867     Ops.push_back(TexHandle);
   3868     Ops.push_back(N->getOperand(2));
   3869     Ops.push_back(N->getOperand(3));
   3870     Ops.push_back(N->getOperand(4));
   3871     Ops.push_back(Chain);
   3872     break;
   3873   case NVPTXISD::Suld3DV2I8Clamp:
   3874     Opc = NVPTX::SULD_3D_V2I8_CLAMP;
   3875     Ops.push_back(TexHandle);
   3876     Ops.push_back(N->getOperand(2));
   3877     Ops.push_back(N->getOperand(3));
   3878     Ops.push_back(N->getOperand(4));
   3879     Ops.push_back(Chain);
   3880     break;
   3881   case NVPTXISD::Suld3DV2I16Clamp:
   3882     Opc = NVPTX::SULD_3D_V2I16_CLAMP;
   3883     Ops.push_back(TexHandle);
   3884     Ops.push_back(N->getOperand(2));
   3885     Ops.push_back(N->getOperand(3));
   3886     Ops.push_back(N->getOperand(4));
   3887     Ops.push_back(Chain);
   3888     break;
   3889   case NVPTXISD::Suld3DV2I32Clamp:
   3890     Opc = NVPTX::SULD_3D_V2I32_CLAMP;
   3891     Ops.push_back(TexHandle);
   3892     Ops.push_back(N->getOperand(2));
   3893     Ops.push_back(N->getOperand(3));
   3894     Ops.push_back(N->getOperand(4));
   3895     Ops.push_back(Chain);
   3896     break;
   3897   case NVPTXISD::Suld3DV2I64Clamp:
   3898     Opc = NVPTX::SULD_3D_V2I64_CLAMP;
   3899     Ops.push_back(TexHandle);
   3900     Ops.push_back(N->getOperand(2));
   3901     Ops.push_back(N->getOperand(3));
   3902     Ops.push_back(N->getOperand(4));
   3903     Ops.push_back(Chain);
   3904     break;
   3905   case NVPTXISD::Suld3DV4I8Clamp:
   3906     Opc = NVPTX::SULD_3D_V4I8_CLAMP;
   3907     Ops.push_back(TexHandle);
   3908     Ops.push_back(N->getOperand(2));
   3909     Ops.push_back(N->getOperand(3));
   3910     Ops.push_back(N->getOperand(4));
   3911     Ops.push_back(Chain);
   3912     break;
   3913   case NVPTXISD::Suld3DV4I16Clamp:
   3914     Opc = NVPTX::SULD_3D_V4I16_CLAMP;
   3915     Ops.push_back(TexHandle);
   3916     Ops.push_back(N->getOperand(2));
   3917     Ops.push_back(N->getOperand(3));
   3918     Ops.push_back(N->getOperand(4));
   3919     Ops.push_back(Chain);
   3920     break;
   3921   case NVPTXISD::Suld3DV4I32Clamp:
   3922     Opc = NVPTX::SULD_3D_V4I32_CLAMP;
   3923     Ops.push_back(TexHandle);
   3924     Ops.push_back(N->getOperand(2));
   3925     Ops.push_back(N->getOperand(3));
   3926     Ops.push_back(N->getOperand(4));
   3927     Ops.push_back(Chain);
   3928     break;
   3929   case NVPTXISD::Suld1DI8Trap:
   3930     Opc = NVPTX::SULD_1D_I8_TRAP;
   3931     Ops.push_back(TexHandle);
   3932     Ops.push_back(N->getOperand(2));
   3933     Ops.push_back(Chain);
   3934     break;
   3935   case NVPTXISD::Suld1DI16Trap:
   3936     Opc = NVPTX::SULD_1D_I16_TRAP;
   3937     Ops.push_back(TexHandle);
   3938     Ops.push_back(N->getOperand(2));
   3939     Ops.push_back(Chain);
   3940     break;
   3941   case NVPTXISD::Suld1DI32Trap:
   3942     Opc = NVPTX::SULD_1D_I32_TRAP;
   3943     Ops.push_back(TexHandle);
   3944     Ops.push_back(N->getOperand(2));
   3945     Ops.push_back(Chain);
   3946     break;
   3947   case NVPTXISD::Suld1DI64Trap:
   3948     Opc = NVPTX::SULD_1D_I64_TRAP;
   3949     Ops.push_back(TexHandle);
   3950     Ops.push_back(N->getOperand(2));
   3951     Ops.push_back(Chain);
   3952     break;
   3953   case NVPTXISD::Suld1DV2I8Trap:
   3954     Opc = NVPTX::SULD_1D_V2I8_TRAP;
   3955     Ops.push_back(TexHandle);
   3956     Ops.push_back(N->getOperand(2));
   3957     Ops.push_back(Chain);
   3958     break;
   3959   case NVPTXISD::Suld1DV2I16Trap:
   3960     Opc = NVPTX::SULD_1D_V2I16_TRAP;
   3961     Ops.push_back(TexHandle);
   3962     Ops.push_back(N->getOperand(2));
   3963     Ops.push_back(Chain);
   3964     break;
   3965   case NVPTXISD::Suld1DV2I32Trap:
   3966     Opc = NVPTX::SULD_1D_V2I32_TRAP;
   3967     Ops.push_back(TexHandle);
   3968     Ops.push_back(N->getOperand(2));
   3969     Ops.push_back(Chain);
   3970     break;
   3971   case NVPTXISD::Suld1DV2I64Trap:
   3972     Opc = NVPTX::SULD_1D_V2I64_TRAP;
   3973     Ops.push_back(TexHandle);
   3974     Ops.push_back(N->getOperand(2));
   3975     Ops.push_back(Chain);
   3976     break;
   3977   case NVPTXISD::Suld1DV4I8Trap:
   3978     Opc = NVPTX::SULD_1D_V4I8_TRAP;
   3979     Ops.push_back(TexHandle);
   3980     Ops.push_back(N->getOperand(2));
   3981     Ops.push_back(Chain);
   3982     break;
   3983   case NVPTXISD::Suld1DV4I16Trap:
   3984     Opc = NVPTX::SULD_1D_V4I16_TRAP;
   3985     Ops.push_back(TexHandle);
   3986     Ops.push_back(N->getOperand(2));
   3987     Ops.push_back(Chain);
   3988     break;
   3989   case NVPTXISD::Suld1DV4I32Trap:
   3990     Opc = NVPTX::SULD_1D_V4I32_TRAP;
   3991     Ops.push_back(TexHandle);
   3992     Ops.push_back(N->getOperand(2));
   3993     Ops.push_back(Chain);
   3994     break;
   3995   case NVPTXISD::Suld1DArrayI8Trap:
   3996     Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
   3997     Ops.push_back(TexHandle);
   3998     Ops.push_back(N->getOperand(2));
   3999     Ops.push_back(N->getOperand(3));
   4000     Ops.push_back(Chain);
   4001     break;
   4002   case NVPTXISD::Suld1DArrayI16Trap:
   4003     Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
   4004     Ops.push_back(TexHandle);
   4005     Ops.push_back(N->getOperand(2));
   4006     Ops.push_back(N->getOperand(3));
   4007     Ops.push_back(Chain);
   4008     break;
   4009   case NVPTXISD::Suld1DArrayI32Trap:
   4010     Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
   4011     Ops.push_back(TexHandle);
   4012     Ops.push_back(N->getOperand(2));
   4013     Ops.push_back(N->getOperand(3));
   4014     Ops.push_back(Chain);
   4015     break;
   4016   case NVPTXISD::Suld1DArrayI64Trap:
   4017     Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
   4018     Ops.push_back(TexHandle);
   4019     Ops.push_back(N->getOperand(2));
   4020     Ops.push_back(N->getOperand(3));
   4021     Ops.push_back(Chain);
   4022     break;
   4023   case NVPTXISD::Suld1DArrayV2I8Trap:
   4024     Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
   4025     Ops.push_back(TexHandle);
   4026     Ops.push_back(N->getOperand(2));
   4027     Ops.push_back(N->getOperand(3));
   4028     Ops.push_back(Chain);
   4029     break;
   4030   case NVPTXISD::Suld1DArrayV2I16Trap:
   4031     Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
   4032     Ops.push_back(TexHandle);
   4033     Ops.push_back(N->getOperand(2));
   4034     Ops.push_back(N->getOperand(3));
   4035     Ops.push_back(Chain);
   4036     break;
   4037   case NVPTXISD::Suld1DArrayV2I32Trap:
   4038     Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
   4039     Ops.push_back(TexHandle);
   4040     Ops.push_back(N->getOperand(2));
   4041     Ops.push_back(N->getOperand(3));
   4042     Ops.push_back(Chain);
   4043     break;
   4044   case NVPTXISD::Suld1DArrayV2I64Trap:
   4045     Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
   4046     Ops.push_back(TexHandle);
   4047     Ops.push_back(N->getOperand(2));
   4048     Ops.push_back(N->getOperand(3));
   4049     Ops.push_back(Chain);
   4050     break;
   4051   case NVPTXISD::Suld1DArrayV4I8Trap:
   4052     Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
   4053     Ops.push_back(TexHandle);
   4054     Ops.push_back(N->getOperand(2));
   4055     Ops.push_back(N->getOperand(3));
   4056     Ops.push_back(Chain);
   4057     break;
   4058   case NVPTXISD::Suld1DArrayV4I16Trap:
   4059     Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
   4060     Ops.push_back(TexHandle);
   4061     Ops.push_back(N->getOperand(2));
   4062     Ops.push_back(N->getOperand(3));
   4063     Ops.push_back(Chain);
   4064     break;
   4065   case NVPTXISD::Suld1DArrayV4I32Trap:
   4066     Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
   4067     Ops.push_back(TexHandle);
   4068     Ops.push_back(N->getOperand(2));
   4069     Ops.push_back(N->getOperand(3));
   4070     Ops.push_back(Chain);
   4071     break;
   4072   case NVPTXISD::Suld2DI8Trap:
   4073     Opc = NVPTX::SULD_2D_I8_TRAP;
   4074     Ops.push_back(TexHandle);
   4075     Ops.push_back(N->getOperand(2));
   4076     Ops.push_back(N->getOperand(3));
   4077     Ops.push_back(Chain);
   4078     break;
   4079   case NVPTXISD::Suld2DI16Trap:
   4080     Opc = NVPTX::SULD_2D_I16_TRAP;
   4081     Ops.push_back(TexHandle);
   4082     Ops.push_back(N->getOperand(2));
   4083     Ops.push_back(N->getOperand(3));
   4084     Ops.push_back(Chain);
   4085     break;
   4086   case NVPTXISD::Suld2DI32Trap:
   4087     Opc = NVPTX::SULD_2D_I32_TRAP;
   4088     Ops.push_back(TexHandle);
   4089     Ops.push_back(N->getOperand(2));
   4090     Ops.push_back(N->getOperand(3));
   4091     Ops.push_back(Chain);
   4092     break;
   4093   case NVPTXISD::Suld2DI64Trap:
   4094     Opc = NVPTX::SULD_2D_I64_TRAP;
   4095     Ops.push_back(TexHandle);
   4096     Ops.push_back(N->getOperand(2));
   4097     Ops.push_back(N->getOperand(3));
   4098     Ops.push_back(Chain);
   4099     break;
   4100   case NVPTXISD::Suld2DV2I8Trap:
   4101     Opc = NVPTX::SULD_2D_V2I8_TRAP;
   4102     Ops.push_back(TexHandle);
   4103     Ops.push_back(N->getOperand(2));
   4104     Ops.push_back(N->getOperand(3));
   4105     Ops.push_back(Chain);
   4106     break;
   4107   case NVPTXISD::Suld2DV2I16Trap:
   4108     Opc = NVPTX::SULD_2D_V2I16_TRAP;
   4109     Ops.push_back(TexHandle);
   4110     Ops.push_back(N->getOperand(2));
   4111     Ops.push_back(N->getOperand(3));
   4112     Ops.push_back(Chain);
   4113     break;
   4114   case NVPTXISD::Suld2DV2I32Trap:
   4115     Opc = NVPTX::SULD_2D_V2I32_TRAP;
   4116     Ops.push_back(TexHandle);
   4117     Ops.push_back(N->getOperand(2));
   4118     Ops.push_back(N->getOperand(3));
   4119     Ops.push_back(Chain);
   4120     break;
   4121   case NVPTXISD::Suld2DV2I64Trap:
   4122     Opc = NVPTX::SULD_2D_V2I64_TRAP;
   4123     Ops.push_back(TexHandle);
   4124     Ops.push_back(N->getOperand(2));
   4125     Ops.push_back(N->getOperand(3));
   4126     Ops.push_back(Chain);
   4127     break;
   4128   case NVPTXISD::Suld2DV4I8Trap:
   4129     Opc = NVPTX::SULD_2D_V4I8_TRAP;
   4130     Ops.push_back(TexHandle);
   4131     Ops.push_back(N->getOperand(2));
   4132     Ops.push_back(N->getOperand(3));
   4133     Ops.push_back(Chain);
   4134     break;
   4135   case NVPTXISD::Suld2DV4I16Trap:
   4136     Opc = NVPTX::SULD_2D_V4I16_TRAP;
   4137     Ops.push_back(TexHandle);
   4138     Ops.push_back(N->getOperand(2));
   4139     Ops.push_back(N->getOperand(3));
   4140     Ops.push_back(Chain);
   4141     break;
   4142   case NVPTXISD::Suld2DV4I32Trap:
   4143     Opc = NVPTX::SULD_2D_V4I32_TRAP;
   4144     Ops.push_back(TexHandle);
   4145     Ops.push_back(N->getOperand(2));
   4146     Ops.push_back(N->getOperand(3));
   4147     Ops.push_back(Chain);
   4148     break;
   4149   case NVPTXISD::Suld2DArrayI8Trap:
   4150     Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
   4151     Ops.push_back(TexHandle);
   4152     Ops.push_back(N->getOperand(2));
   4153     Ops.push_back(N->getOperand(3));
   4154     Ops.push_back(N->getOperand(4));
   4155     Ops.push_back(Chain);
   4156     break;
   4157   case NVPTXISD::Suld2DArrayI16Trap:
   4158     Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
   4159     Ops.push_back(TexHandle);
   4160     Ops.push_back(N->getOperand(2));
   4161     Ops.push_back(N->getOperand(3));
   4162     Ops.push_back(N->getOperand(4));
   4163     Ops.push_back(Chain);
   4164     break;
   4165   case NVPTXISD::Suld2DArrayI32Trap:
   4166     Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
   4167     Ops.push_back(TexHandle);
   4168     Ops.push_back(N->getOperand(2));
   4169     Ops.push_back(N->getOperand(3));
   4170     Ops.push_back(N->getOperand(4));
   4171     Ops.push_back(Chain);
   4172     break;
   4173   case NVPTXISD::Suld2DArrayI64Trap:
   4174     Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
   4175     Ops.push_back(TexHandle);
   4176     Ops.push_back(N->getOperand(2));
   4177     Ops.push_back(N->getOperand(3));
   4178     Ops.push_back(N->getOperand(4));
   4179     Ops.push_back(Chain);
   4180     break;
   4181   case NVPTXISD::Suld2DArrayV2I8Trap:
   4182     Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
   4183     Ops.push_back(TexHandle);
   4184     Ops.push_back(N->getOperand(2));
   4185     Ops.push_back(N->getOperand(3));
   4186     Ops.push_back(N->getOperand(4));
   4187     Ops.push_back(Chain);
   4188     break;
   4189   case NVPTXISD::Suld2DArrayV2I16Trap:
   4190     Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
   4191     Ops.push_back(TexHandle);
   4192     Ops.push_back(N->getOperand(2));
   4193     Ops.push_back(N->getOperand(3));
   4194     Ops.push_back(N->getOperand(4));
   4195     Ops.push_back(Chain);
   4196     break;
   4197   case NVPTXISD::Suld2DArrayV2I32Trap:
   4198     Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
   4199     Ops.push_back(TexHandle);
   4200     Ops.push_back(N->getOperand(2));
   4201     Ops.push_back(N->getOperand(3));
   4202     Ops.push_back(N->getOperand(4));
   4203     Ops.push_back(Chain);
   4204     break;
   4205   case NVPTXISD::Suld2DArrayV2I64Trap:
   4206     Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
   4207     Ops.push_back(TexHandle);
   4208     Ops.push_back(N->getOperand(2));
   4209     Ops.push_back(N->getOperand(3));
   4210     Ops.push_back(N->getOperand(4));
   4211     Ops.push_back(Chain);
   4212     break;
   4213   case NVPTXISD::Suld2DArrayV4I8Trap:
   4214     Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
   4215     Ops.push_back(TexHandle);
   4216     Ops.push_back(N->getOperand(2));
   4217     Ops.push_back(N->getOperand(3));
   4218     Ops.push_back(N->getOperand(4));
   4219     Ops.push_back(Chain);
   4220     break;
   4221   case NVPTXISD::Suld2DArrayV4I16Trap:
   4222     Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
   4223     Ops.push_back(TexHandle);
   4224     Ops.push_back(N->getOperand(2));
   4225     Ops.push_back(N->getOperand(3));
   4226     Ops.push_back(N->getOperand(4));
   4227     Ops.push_back(Chain);
   4228     break;
   4229   case NVPTXISD::Suld2DArrayV4I32Trap:
   4230     Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
   4231     Ops.push_back(TexHandle);
   4232     Ops.push_back(N->getOperand(2));
   4233     Ops.push_back(N->getOperand(3));
   4234     Ops.push_back(N->getOperand(4));
   4235     Ops.push_back(Chain);
   4236     break;
   4237   case NVPTXISD::Suld3DI8Trap:
   4238     Opc = NVPTX::SULD_3D_I8_TRAP;
   4239     Ops.push_back(TexHandle);
   4240     Ops.push_back(N->getOperand(2));
   4241     Ops.push_back(N->getOperand(3));
   4242     Ops.push_back(N->getOperand(4));
   4243     Ops.push_back(Chain);
   4244     break;
   4245   case NVPTXISD::Suld3DI16Trap:
   4246     Opc = NVPTX::SULD_3D_I16_TRAP;
   4247     Ops.push_back(TexHandle);
   4248     Ops.push_back(N->getOperand(2));
   4249     Ops.push_back(N->getOperand(3));
   4250     Ops.push_back(N->getOperand(4));
   4251     Ops.push_back(Chain);
   4252     break;
   4253   case NVPTXISD::Suld3DI32Trap:
   4254     Opc = NVPTX::SULD_3D_I32_TRAP;
   4255     Ops.push_back(TexHandle);
   4256     Ops.push_back(N->getOperand(2));
   4257     Ops.push_back(N->getOperand(3));
   4258     Ops.push_back(N->getOperand(4));
   4259     Ops.push_back(Chain);
   4260     break;
   4261   case NVPTXISD::Suld3DI64Trap:
   4262     Opc = NVPTX::SULD_3D_I64_TRAP;
   4263     Ops.push_back(TexHandle);
   4264     Ops.push_back(N->getOperand(2));
   4265     Ops.push_back(N->getOperand(3));
   4266     Ops.push_back(N->getOperand(4));
   4267     Ops.push_back(Chain);
   4268     break;
   4269   case NVPTXISD::Suld3DV2I8Trap:
   4270     Opc = NVPTX::SULD_3D_V2I8_TRAP;
   4271     Ops.push_back(TexHandle);
   4272     Ops.push_back(N->getOperand(2));
   4273     Ops.push_back(N->getOperand(3));
   4274     Ops.push_back(N->getOperand(4));
   4275     Ops.push_back(Chain);
   4276     break;
   4277   case NVPTXISD::Suld3DV2I16Trap:
   4278     Opc = NVPTX::SULD_3D_V2I16_TRAP;
   4279     Ops.push_back(TexHandle);
   4280     Ops.push_back(N->getOperand(2));
   4281     Ops.push_back(N->getOperand(3));
   4282     Ops.push_back(N->getOperand(4));
   4283     Ops.push_back(Chain);
   4284     break;
   4285   case NVPTXISD::Suld3DV2I32Trap:
   4286     Opc = NVPTX::SULD_3D_V2I32_TRAP;
   4287     Ops.push_back(TexHandle);
   4288     Ops.push_back(N->getOperand(2));
   4289     Ops.push_back(N->getOperand(3));
   4290     Ops.push_back(N->getOperand(4));
   4291     Ops.push_back(Chain);
   4292     break;
   4293   case NVPTXISD::Suld3DV2I64Trap:
   4294     Opc = NVPTX::SULD_3D_V2I64_TRAP;
   4295     Ops.push_back(TexHandle);
   4296     Ops.push_back(N->getOperand(2));
   4297     Ops.push_back(N->getOperand(3));
   4298     Ops.push_back(N->getOperand(4));
   4299     Ops.push_back(Chain);
   4300     break;
   4301   case NVPTXISD::Suld3DV4I8Trap:
   4302     Opc = NVPTX::SULD_3D_V4I8_TRAP;
   4303     Ops.push_back(TexHandle);
   4304     Ops.push_back(N->getOperand(2));
   4305     Ops.push_back(N->getOperand(3));
   4306     Ops.push_back(N->getOperand(4));
   4307     Ops.push_back(Chain);
   4308     break;
   4309   case NVPTXISD::Suld3DV4I16Trap:
   4310     Opc = NVPTX::SULD_3D_V4I16_TRAP;
   4311     Ops.push_back(TexHandle);
   4312     Ops.push_back(N->getOperand(2));
   4313     Ops.push_back(N->getOperand(3));
   4314     Ops.push_back(N->getOperand(4));
   4315     Ops.push_back(Chain);
   4316     break;
   4317   case NVPTXISD::Suld3DV4I32Trap:
   4318     Opc = NVPTX::SULD_3D_V4I32_TRAP;
   4319     Ops.push_back(TexHandle);
   4320     Ops.push_back(N->getOperand(2));
   4321     Ops.push_back(N->getOperand(3));
   4322     Ops.push_back(N->getOperand(4));
   4323     Ops.push_back(Chain);
   4324     break;
   4325   case NVPTXISD::Suld1DI8Zero:
   4326     Opc = NVPTX::SULD_1D_I8_ZERO;
   4327     Ops.push_back(TexHandle);
   4328     Ops.push_back(N->getOperand(2));
   4329     Ops.push_back(Chain);
   4330     break;
   4331   case NVPTXISD::Suld1DI16Zero:
   4332     Opc = NVPTX::SULD_1D_I16_ZERO;
   4333     Ops.push_back(TexHandle);
   4334     Ops.push_back(N->getOperand(2));
   4335     Ops.push_back(Chain);
   4336     break;
   4337   case NVPTXISD::Suld1DI32Zero:
   4338     Opc = NVPTX::SULD_1D_I32_ZERO;
   4339     Ops.push_back(TexHandle);
   4340     Ops.push_back(N->getOperand(2));
   4341     Ops.push_back(Chain);
   4342     break;
   4343   case NVPTXISD::Suld1DI64Zero:
   4344     Opc = NVPTX::SULD_1D_I64_ZERO;
   4345     Ops.push_back(TexHandle);
   4346     Ops.push_back(N->getOperand(2));
   4347     Ops.push_back(Chain);
   4348     break;
   4349   case NVPTXISD::Suld1DV2I8Zero:
   4350     Opc = NVPTX::SULD_1D_V2I8_ZERO;
   4351     Ops.push_back(TexHandle);
   4352     Ops.push_back(N->getOperand(2));
   4353     Ops.push_back(Chain);
   4354     break;
   4355   case NVPTXISD::Suld1DV2I16Zero:
   4356     Opc = NVPTX::SULD_1D_V2I16_ZERO;
   4357     Ops.push_back(TexHandle);
   4358     Ops.push_back(N->getOperand(2));
   4359     Ops.push_back(Chain);
   4360     break;
   4361   case NVPTXISD::Suld1DV2I32Zero:
   4362     Opc = NVPTX::SULD_1D_V2I32_ZERO;
   4363     Ops.push_back(TexHandle);
   4364     Ops.push_back(N->getOperand(2));
   4365     Ops.push_back(Chain);
   4366     break;
   4367   case NVPTXISD::Suld1DV2I64Zero:
   4368     Opc = NVPTX::SULD_1D_V2I64_ZERO;
   4369     Ops.push_back(TexHandle);
   4370     Ops.push_back(N->getOperand(2));
   4371     Ops.push_back(Chain);
   4372     break;
   4373   case NVPTXISD::Suld1DV4I8Zero:
   4374     Opc = NVPTX::SULD_1D_V4I8_ZERO;
   4375     Ops.push_back(TexHandle);
   4376     Ops.push_back(N->getOperand(2));
   4377     Ops.push_back(Chain);
   4378     break;
   4379   case NVPTXISD::Suld1DV4I16Zero:
   4380     Opc = NVPTX::SULD_1D_V4I16_ZERO;
   4381     Ops.push_back(TexHandle);
   4382     Ops.push_back(N->getOperand(2));
   4383     Ops.push_back(Chain);
   4384     break;
   4385   case NVPTXISD::Suld1DV4I32Zero:
   4386     Opc = NVPTX::SULD_1D_V4I32_ZERO;
   4387     Ops.push_back(TexHandle);
   4388     Ops.push_back(N->getOperand(2));
   4389     Ops.push_back(Chain);
   4390     break;
   4391   case NVPTXISD::Suld1DArrayI8Zero:
   4392     Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
   4393     Ops.push_back(TexHandle);
   4394     Ops.push_back(N->getOperand(2));
   4395     Ops.push_back(N->getOperand(3));
   4396     Ops.push_back(Chain);
   4397     break;
   4398   case NVPTXISD::Suld1DArrayI16Zero:
   4399     Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
   4400     Ops.push_back(TexHandle);
   4401     Ops.push_back(N->getOperand(2));
   4402     Ops.push_back(N->getOperand(3));
   4403     Ops.push_back(Chain);
   4404     break;
   4405   case NVPTXISD::Suld1DArrayI32Zero:
   4406     Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
   4407     Ops.push_back(TexHandle);
   4408     Ops.push_back(N->getOperand(2));
   4409     Ops.push_back(N->getOperand(3));
   4410     Ops.push_back(Chain);
   4411     break;
   4412   case NVPTXISD::Suld1DArrayI64Zero:
   4413     Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
   4414     Ops.push_back(TexHandle);
   4415     Ops.push_back(N->getOperand(2));
   4416     Ops.push_back(N->getOperand(3));
   4417     Ops.push_back(Chain);
   4418     break;
   4419   case NVPTXISD::Suld1DArrayV2I8Zero:
   4420     Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
   4421     Ops.push_back(TexHandle);
   4422     Ops.push_back(N->getOperand(2));
   4423     Ops.push_back(N->getOperand(3));
   4424     Ops.push_back(Chain);
   4425     break;
   4426   case NVPTXISD::Suld1DArrayV2I16Zero:
   4427     Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
   4428     Ops.push_back(TexHandle);
   4429     Ops.push_back(N->getOperand(2));
   4430     Ops.push_back(N->getOperand(3));
   4431     Ops.push_back(Chain);
   4432     break;
   4433   case NVPTXISD::Suld1DArrayV2I32Zero:
   4434     Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
   4435     Ops.push_back(TexHandle);
   4436     Ops.push_back(N->getOperand(2));
   4437     Ops.push_back(N->getOperand(3));
   4438     Ops.push_back(Chain);
   4439     break;
   4440   case NVPTXISD::Suld1DArrayV2I64Zero:
   4441     Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
   4442     Ops.push_back(TexHandle);
   4443     Ops.push_back(N->getOperand(2));
   4444     Ops.push_back(N->getOperand(3));
   4445     Ops.push_back(Chain);
   4446     break;
   4447   case NVPTXISD::Suld1DArrayV4I8Zero:
   4448     Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
   4449     Ops.push_back(TexHandle);
   4450     Ops.push_back(N->getOperand(2));
   4451     Ops.push_back(N->getOperand(3));
   4452     Ops.push_back(Chain);
   4453     break;
   4454   case NVPTXISD::Suld1DArrayV4I16Zero:
   4455     Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
   4456     Ops.push_back(TexHandle);
   4457     Ops.push_back(N->getOperand(2));
   4458     Ops.push_back(N->getOperand(3));
   4459     Ops.push_back(Chain);
   4460     break;
   4461   case NVPTXISD::Suld1DArrayV4I32Zero:
   4462     Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
   4463     Ops.push_back(TexHandle);
   4464     Ops.push_back(N->getOperand(2));
   4465     Ops.push_back(N->getOperand(3));
   4466     Ops.push_back(Chain);
   4467     break;
   4468   case NVPTXISD::Suld2DI8Zero:
   4469     Opc = NVPTX::SULD_2D_I8_ZERO;
   4470     Ops.push_back(TexHandle);
   4471     Ops.push_back(N->getOperand(2));
   4472     Ops.push_back(N->getOperand(3));
   4473     Ops.push_back(Chain);
   4474     break;
   4475   case NVPTXISD::Suld2DI16Zero:
   4476     Opc = NVPTX::SULD_2D_I16_ZERO;
   4477     Ops.push_back(TexHandle);
   4478     Ops.push_back(N->getOperand(2));
   4479     Ops.push_back(N->getOperand(3));
   4480     Ops.push_back(Chain);
   4481     break;
   4482   case NVPTXISD::Suld2DI32Zero:
   4483     Opc = NVPTX::SULD_2D_I32_ZERO;
   4484     Ops.push_back(TexHandle);
   4485     Ops.push_back(N->getOperand(2));
   4486     Ops.push_back(N->getOperand(3));
   4487     Ops.push_back(Chain);
   4488     break;
   4489   case NVPTXISD::Suld2DI64Zero:
   4490     Opc = NVPTX::SULD_2D_I64_ZERO;
   4491     Ops.push_back(TexHandle);
   4492     Ops.push_back(N->getOperand(2));
   4493     Ops.push_back(N->getOperand(3));
   4494     Ops.push_back(Chain);
   4495     break;
   4496   case NVPTXISD::Suld2DV2I8Zero:
   4497     Opc = NVPTX::SULD_2D_V2I8_ZERO;
   4498     Ops.push_back(TexHandle);
   4499     Ops.push_back(N->getOperand(2));
   4500     Ops.push_back(N->getOperand(3));
   4501     Ops.push_back(Chain);
   4502     break;
   4503   case NVPTXISD::Suld2DV2I16Zero:
   4504     Opc = NVPTX::SULD_2D_V2I16_ZERO;
   4505     Ops.push_back(TexHandle);
   4506     Ops.push_back(N->getOperand(2));
   4507     Ops.push_back(N->getOperand(3));
   4508     Ops.push_back(Chain);
   4509     break;
   4510   case NVPTXISD::Suld2DV2I32Zero:
   4511     Opc = NVPTX::SULD_2D_V2I32_ZERO;
   4512     Ops.push_back(TexHandle);
   4513     Ops.push_back(N->getOperand(2));
   4514     Ops.push_back(N->getOperand(3));
   4515     Ops.push_back(Chain);
   4516     break;
   4517   case NVPTXISD::Suld2DV2I64Zero:
   4518     Opc = NVPTX::SULD_2D_V2I64_ZERO;
   4519     Ops.push_back(TexHandle);
   4520     Ops.push_back(N->getOperand(2));
   4521     Ops.push_back(N->getOperand(3));
   4522     Ops.push_back(Chain);
   4523     break;
   4524   case NVPTXISD::Suld2DV4I8Zero:
   4525     Opc = NVPTX::SULD_2D_V4I8_ZERO;
   4526     Ops.push_back(TexHandle);
   4527     Ops.push_back(N->getOperand(2));
   4528     Ops.push_back(N->getOperand(3));
   4529     Ops.push_back(Chain);
   4530     break;
   4531   case NVPTXISD::Suld2DV4I16Zero:
   4532     Opc = NVPTX::SULD_2D_V4I16_ZERO;
   4533     Ops.push_back(TexHandle);
   4534     Ops.push_back(N->getOperand(2));
   4535     Ops.push_back(N->getOperand(3));
   4536     Ops.push_back(Chain);
   4537     break;
   4538   case NVPTXISD::Suld2DV4I32Zero:
   4539     Opc = NVPTX::SULD_2D_V4I32_ZERO;
   4540     Ops.push_back(TexHandle);
   4541     Ops.push_back(N->getOperand(2));
   4542     Ops.push_back(N->getOperand(3));
   4543     Ops.push_back(Chain);
   4544     break;
   4545   case NVPTXISD::Suld2DArrayI8Zero:
   4546     Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
   4547     Ops.push_back(TexHandle);
   4548     Ops.push_back(N->getOperand(2));
   4549     Ops.push_back(N->getOperand(3));
   4550     Ops.push_back(N->getOperand(4));
   4551     Ops.push_back(Chain);
   4552     break;
   4553   case NVPTXISD::Suld2DArrayI16Zero:
   4554     Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
   4555     Ops.push_back(TexHandle);
   4556     Ops.push_back(N->getOperand(2));
   4557     Ops.push_back(N->getOperand(3));
   4558     Ops.push_back(N->getOperand(4));
   4559     Ops.push_back(Chain);
   4560     break;
   4561   case NVPTXISD::Suld2DArrayI32Zero:
   4562     Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
   4563     Ops.push_back(TexHandle);
   4564     Ops.push_back(N->getOperand(2));
   4565     Ops.push_back(N->getOperand(3));
   4566     Ops.push_back(N->getOperand(4));
   4567     Ops.push_back(Chain);
   4568     break;
   4569   case NVPTXISD::Suld2DArrayI64Zero:
   4570     Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
   4571     Ops.push_back(TexHandle);
   4572     Ops.push_back(N->getOperand(2));
   4573     Ops.push_back(N->getOperand(3));
   4574     Ops.push_back(N->getOperand(4));
   4575     Ops.push_back(Chain);
   4576     break;
   4577   case NVPTXISD::Suld2DArrayV2I8Zero:
   4578     Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
   4579     Ops.push_back(TexHandle);
   4580     Ops.push_back(N->getOperand(2));
   4581     Ops.push_back(N->getOperand(3));
   4582     Ops.push_back(N->getOperand(4));
   4583     Ops.push_back(Chain);
   4584     break;
   4585   case NVPTXISD::Suld2DArrayV2I16Zero:
   4586     Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
   4587     Ops.push_back(TexHandle);
   4588     Ops.push_back(N->getOperand(2));
   4589     Ops.push_back(N->getOperand(3));
   4590     Ops.push_back(N->getOperand(4));
   4591     Ops.push_back(Chain);
   4592     break;
   4593   case NVPTXISD::Suld2DArrayV2I32Zero:
   4594     Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
   4595     Ops.push_back(TexHandle);
   4596     Ops.push_back(N->getOperand(2));
   4597     Ops.push_back(N->getOperand(3));
   4598     Ops.push_back(N->getOperand(4));
   4599     Ops.push_back(Chain);
   4600     break;
   4601   case NVPTXISD::Suld2DArrayV2I64Zero:
   4602     Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
   4603     Ops.push_back(TexHandle);
   4604     Ops.push_back(N->getOperand(2));
   4605     Ops.push_back(N->getOperand(3));
   4606     Ops.push_back(N->getOperand(4));
   4607     Ops.push_back(Chain);
   4608     break;
   4609   case NVPTXISD::Suld2DArrayV4I8Zero:
   4610     Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
   4611     Ops.push_back(TexHandle);
   4612     Ops.push_back(N->getOperand(2));
   4613     Ops.push_back(N->getOperand(3));
   4614     Ops.push_back(N->getOperand(4));
   4615     Ops.push_back(Chain);
   4616     break;
   4617   case NVPTXISD::Suld2DArrayV4I16Zero:
   4618     Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
   4619     Ops.push_back(TexHandle);
   4620     Ops.push_back(N->getOperand(2));
   4621     Ops.push_back(N->getOperand(3));
   4622     Ops.push_back(N->getOperand(4));
   4623     Ops.push_back(Chain);
   4624     break;
   4625   case NVPTXISD::Suld2DArrayV4I32Zero:
   4626     Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
   4627     Ops.push_back(TexHandle);
   4628     Ops.push_back(N->getOperand(2));
   4629     Ops.push_back(N->getOperand(3));
   4630     Ops.push_back(N->getOperand(4));
   4631     Ops.push_back(Chain);
   4632     break;
   4633   case NVPTXISD::Suld3DI8Zero:
   4634     Opc = NVPTX::SULD_3D_I8_ZERO;
   4635     Ops.push_back(TexHandle);
   4636     Ops.push_back(N->getOperand(2));
   4637     Ops.push_back(N->getOperand(3));
   4638     Ops.push_back(N->getOperand(4));
   4639     Ops.push_back(Chain);
   4640     break;
   4641   case NVPTXISD::Suld3DI16Zero:
   4642     Opc = NVPTX::SULD_3D_I16_ZERO;
   4643     Ops.push_back(TexHandle);
   4644     Ops.push_back(N->getOperand(2));
   4645     Ops.push_back(N->getOperand(3));
   4646     Ops.push_back(N->getOperand(4));
   4647     Ops.push_back(Chain);
   4648     break;
   4649   case NVPTXISD::Suld3DI32Zero:
   4650     Opc = NVPTX::SULD_3D_I32_ZERO;
   4651     Ops.push_back(TexHandle);
   4652     Ops.push_back(N->getOperand(2));
   4653     Ops.push_back(N->getOperand(3));
   4654     Ops.push_back(N->getOperand(4));
   4655     Ops.push_back(Chain);
   4656     break;
   4657   case NVPTXISD::Suld3DI64Zero:
   4658     Opc = NVPTX::SULD_3D_I64_ZERO;
   4659     Ops.push_back(TexHandle);
   4660     Ops.push_back(N->getOperand(2));
   4661     Ops.push_back(N->getOperand(3));
   4662     Ops.push_back(N->getOperand(4));
   4663     Ops.push_back(Chain);
   4664     break;
   4665   case NVPTXISD::Suld3DV2I8Zero:
   4666     Opc = NVPTX::SULD_3D_V2I8_ZERO;
   4667     Ops.push_back(TexHandle);
   4668     Ops.push_back(N->getOperand(2));
   4669     Ops.push_back(N->getOperand(3));
   4670     Ops.push_back(N->getOperand(4));
   4671     Ops.push_back(Chain);
   4672     break;
   4673   case NVPTXISD::Suld3DV2I16Zero:
   4674     Opc = NVPTX::SULD_3D_V2I16_ZERO;
   4675     Ops.push_back(TexHandle);
   4676     Ops.push_back(N->getOperand(2));
   4677     Ops.push_back(N->getOperand(3));
   4678     Ops.push_back(N->getOperand(4));
   4679     Ops.push_back(Chain);
   4680     break;
   4681   case NVPTXISD::Suld3DV2I32Zero:
   4682     Opc = NVPTX::SULD_3D_V2I32_ZERO;
   4683     Ops.push_back(TexHandle);
   4684     Ops.push_back(N->getOperand(2));
   4685     Ops.push_back(N->getOperand(3));
   4686     Ops.push_back(N->getOperand(4));
   4687     Ops.push_back(Chain);
   4688     break;
   4689   case NVPTXISD::Suld3DV2I64Zero:
   4690     Opc = NVPTX::SULD_3D_V2I64_ZERO;
   4691     Ops.push_back(TexHandle);
   4692     Ops.push_back(N->getOperand(2));
   4693     Ops.push_back(N->getOperand(3));
   4694     Ops.push_back(N->getOperand(4));
   4695     Ops.push_back(Chain);
   4696     break;
   4697   case NVPTXISD::Suld3DV4I8Zero:
   4698     Opc = NVPTX::SULD_3D_V4I8_ZERO;
   4699     Ops.push_back(TexHandle);
   4700     Ops.push_back(N->getOperand(2));
   4701     Ops.push_back(N->getOperand(3));
   4702     Ops.push_back(N->getOperand(4));
   4703     Ops.push_back(Chain);
   4704     break;
   4705   case NVPTXISD::Suld3DV4I16Zero:
   4706     Opc = NVPTX::SULD_3D_V4I16_ZERO;
   4707     Ops.push_back(TexHandle);
   4708     Ops.push_back(N->getOperand(2));
   4709     Ops.push_back(N->getOperand(3));
   4710     Ops.push_back(N->getOperand(4));
   4711     Ops.push_back(Chain);
   4712     break;
   4713   case NVPTXISD::Suld3DV4I32Zero:
   4714     Opc = NVPTX::SULD_3D_V4I32_ZERO;
   4715     Ops.push_back(TexHandle);
   4716     Ops.push_back(N->getOperand(2));
   4717     Ops.push_back(N->getOperand(3));
   4718     Ops.push_back(N->getOperand(4));
   4719     Ops.push_back(Chain);
   4720     break;
   4721   }
   4722   Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
   4723   return Ret;
   4724 }
   4725 
   4726 
   4727 /// SelectBFE - Look for instruction sequences that can be made more efficient
   4728 /// by using the 'bfe' (bit-field extract) PTX instruction
   4729 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
   4730   SDValue LHS = N->getOperand(0);
   4731   SDValue RHS = N->getOperand(1);
   4732   SDValue Len;
   4733   SDValue Start;
   4734   SDValue Val;
   4735   bool IsSigned = false;
   4736 
   4737   if (N->getOpcode() == ISD::AND) {
   4738     // Canonicalize the operands
   4739     // We want 'and %val, %mask'
   4740     if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
   4741       std::swap(LHS, RHS);
   4742     }
   4743 
   4744     ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
   4745     if (!Mask) {
   4746       // We need a constant mask on the RHS of the AND
   4747       return NULL;
   4748     }
   4749 
   4750     // Extract the mask bits
   4751     uint64_t MaskVal = Mask->getZExtValue();
   4752     if (!isMask_64(MaskVal)) {
   4753       // We *could* handle shifted masks here, but doing so would require an
   4754       // 'and' operation to fix up the low-order bits so we would trade
   4755       // shr+and for bfe+and, which has the same throughput
   4756       return NULL;
   4757     }
   4758 
   4759     // How many bits are in our mask?
   4760     uint64_t NumBits = countTrailingOnes(MaskVal);
   4761     Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
   4762 
   4763     if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
   4764       // We have a 'srl/and' pair, extract the effective start bit and length
   4765       Val = LHS.getNode()->getOperand(0);
   4766       Start = LHS.getNode()->getOperand(1);
   4767       ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
   4768       if (StartConst) {
   4769         uint64_t StartVal = StartConst->getZExtValue();
   4770         // How many "good" bits do we have left?  "good" is defined here as bits
   4771         // that exist in the original value, not shifted in.
   4772         uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
   4773         if (NumBits > GoodBits) {
   4774           // Do not handle the case where bits have been shifted in. In theory
   4775           // we could handle this, but the cost is likely higher than just
   4776           // emitting the srl/and pair.
   4777           return NULL;
   4778         }
   4779         Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
   4780       } else {
   4781         // Do not handle the case where the shift amount (can be zero if no srl
   4782         // was found) is not constant. We could handle this case, but it would
   4783         // require run-time logic that would be more expensive than just
   4784         // emitting the srl/and pair.
   4785         return NULL;
   4786       }
   4787     } else {
   4788       // Do not handle the case where the LHS of the and is not a shift. While
   4789       // it would be trivial to handle this case, it would just transform
   4790       // 'and' -> 'bfe', but 'and' has higher-throughput.
   4791       return NULL;
   4792     }
   4793   } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
   4794     if (LHS->getOpcode() == ISD::AND) {
   4795       ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
   4796       if (!ShiftCnst) {
   4797         // Shift amount must be constant
   4798         return NULL;
   4799       }
   4800 
   4801       uint64_t ShiftAmt = ShiftCnst->getZExtValue();
   4802 
   4803       SDValue AndLHS = LHS->getOperand(0);
   4804       SDValue AndRHS = LHS->getOperand(1);
   4805 
   4806       // Canonicalize the AND to have the mask on the RHS
   4807       if (isa<ConstantSDNode>(AndLHS)) {
   4808         std::swap(AndLHS, AndRHS);
   4809       }
   4810 
   4811       ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
   4812       if (!MaskCnst) {
   4813         // Mask must be constant
   4814         return NULL;
   4815       }
   4816 
   4817       uint64_t MaskVal = MaskCnst->getZExtValue();
   4818       uint64_t NumZeros;
   4819       uint64_t NumBits;
   4820       if (isMask_64(MaskVal)) {
   4821         NumZeros = 0;
   4822         // The number of bits in the result bitfield will be the number of
   4823         // trailing ones (the AND) minus the number of bits we shift off
   4824         NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
   4825       } else if (isShiftedMask_64(MaskVal)) {
   4826         NumZeros = countTrailingZeros(MaskVal);
   4827         unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
   4828         // The number of bits in the result bitfield will be the number of
   4829         // trailing zeros plus the number of set bits in the mask minus the
   4830         // number of bits we shift off
   4831         NumBits = NumZeros + NumOnes - ShiftAmt;
   4832       } else {
   4833         // This is not a mask we can handle
   4834         return NULL;
   4835       }
   4836 
   4837       if (ShiftAmt < NumZeros) {
   4838         // Handling this case would require extra logic that would make this
   4839         // transformation non-profitable
   4840         return NULL;
   4841       }
   4842 
   4843       Val = AndLHS;
   4844       Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
   4845       Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
   4846     } else if (LHS->getOpcode() == ISD::SHL) {
   4847       // Here, we have a pattern like:
   4848       //
   4849       // (sra (shl val, NN), MM)
   4850       // or
   4851       // (srl (shl val, NN), MM)
   4852       //
   4853       // If MM >= NN, we can efficiently optimize this with bfe
   4854       Val = LHS->getOperand(0);
   4855 
   4856       SDValue ShlRHS = LHS->getOperand(1);
   4857       ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
   4858       if (!ShlCnst) {
   4859         // Shift amount must be constant
   4860         return NULL;
   4861       }
   4862       uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
   4863 
   4864       SDValue ShrRHS = RHS;
   4865       ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
   4866       if (!ShrCnst) {
   4867         // Shift amount must be constant
   4868         return NULL;
   4869       }
   4870       uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
   4871 
   4872       // To avoid extra codegen and be profitable, we need Outer >= Inner
   4873       if (OuterShiftAmt < InnerShiftAmt) {
   4874         return NULL;
   4875       }
   4876 
   4877       // If the outer shift is more than the type size, we have no bitfield to
   4878       // extract (since we also check that the inner shift is <= the outer shift
   4879       // then this also implies that the inner shift is < the type size)
   4880       if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
   4881         return NULL;
   4882       }
   4883 
   4884       Start =
   4885         CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
   4886       Len =
   4887         CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
   4888                                   OuterShiftAmt, MVT::i32);
   4889 
   4890       if (N->getOpcode() == ISD::SRA) {
   4891         // If we have a arithmetic right shift, we need to use the signed bfe
   4892         // variant
   4893         IsSigned = true;
   4894       }
   4895     } else {
   4896       // No can do...
   4897       return NULL;
   4898     }
   4899   } else {
   4900     // No can do...
   4901     return NULL;
   4902   }
   4903 
   4904 
   4905   unsigned Opc;
   4906   // For the BFE operations we form here from "and" and "srl", always use the
   4907   // unsigned variants.
   4908   if (Val.getValueType() == MVT::i32) {
   4909     if (IsSigned) {
   4910       Opc = NVPTX::BFE_S32rii;
   4911     } else {
   4912       Opc = NVPTX::BFE_U32rii;
   4913     }
   4914   } else if (Val.getValueType() == MVT::i64) {
   4915     if (IsSigned) {
   4916       Opc = NVPTX::BFE_S64rii;
   4917     } else {
   4918       Opc = NVPTX::BFE_U64rii;
   4919     }
   4920   } else {
   4921     // We cannot handle this type
   4922     return NULL;
   4923   }
   4924 
   4925   SDValue Ops[] = {
   4926     Val, Start, Len
   4927   };
   4928 
   4929   SDNode *Ret =
   4930     CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
   4931 
   4932   return Ret;
   4933 }
   4934 
   4935 // SelectDirectAddr - Match a direct address for DAG.
   4936 // A direct address could be a globaladdress or externalsymbol.
   4937 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
   4938   // Return true if TGA or ES.
   4939   if (N.getOpcode() == ISD::TargetGlobalAddress ||
   4940       N.getOpcode() == ISD::TargetExternalSymbol) {
   4941     Address = N;
   4942     return true;
   4943   }
   4944   if (N.getOpcode() == NVPTXISD::Wrapper) {
   4945     Address = N.getOperand(0);
   4946     return true;
   4947   }
   4948   if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
   4949     unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
   4950     if (IID == Intrinsic::nvvm_ptr_gen_to_param)
   4951       if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
   4952         return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
   4953   }
   4954   return false;
   4955 }
   4956 
   4957 // symbol+offset
   4958 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
   4959     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   4960   if (Addr.getOpcode() == ISD::ADD) {
   4961     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
   4962       SDValue base = Addr.getOperand(0);
   4963       if (SelectDirectAddr(base, Base)) {
   4964         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
   4965         return true;
   4966       }
   4967     }
   4968   }
   4969   return false;
   4970 }
   4971 
   4972 // symbol+offset
   4973 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
   4974                                      SDValue &Base, SDValue &Offset) {
   4975   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
   4976 }
   4977 
   4978 // symbol+offset
   4979 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
   4980                                        SDValue &Base, SDValue &Offset) {
   4981   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
   4982 }
   4983 
   4984 // register+offset
   4985 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
   4986     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   4987   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
   4988     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
   4989     Offset = CurDAG->getTargetConstant(0, mvt);
   4990     return true;
   4991   }
   4992   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
   4993       Addr.getOpcode() == ISD::TargetGlobalAddress)
   4994     return false; // direct calls.
   4995 
   4996   if (Addr.getOpcode() == ISD::ADD) {
   4997     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
   4998       return false;
   4999     }
   5000     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
   5001       if (FrameIndexSDNode *FIN =
   5002               dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
   5003         // Constant offset from frame ref.
   5004         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
   5005       else
   5006         Base = Addr.getOperand(0);
   5007       Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
   5008       return true;
   5009     }
   5010   }
   5011   return false;
   5012 }
   5013 
   5014 // register+offset
   5015 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
   5016                                      SDValue &Base, SDValue &Offset) {
   5017   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
   5018 }
   5019 
   5020 // register+offset
   5021 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
   5022                                        SDValue &Base, SDValue &Offset) {
   5023   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
   5024 }
   5025 
   5026 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
   5027                                                  unsigned int spN) const {
   5028   const Value *Src = nullptr;
   5029   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
   5030     if (spN == 0 && mN->getMemOperand()->getPseudoValue())
   5031       return true;
   5032     Src = mN->getMemOperand()->getValue();
   5033   }
   5034   if (!Src)
   5035     return false;
   5036   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
   5037     return (PT->getAddressSpace() == spN);
   5038   return false;
   5039 }
   5040 
   5041 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   5042 /// inline asm expressions.
   5043 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
   5044     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
   5045   SDValue Op0, Op1;
   5046   switch (ConstraintID) {
   5047   default:
   5048     return true;
   5049   case InlineAsm::Constraint_m: // memory
   5050     if (SelectDirectAddr(Op, Op0)) {
   5051       OutOps.push_back(Op0);
   5052       OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
   5053       return false;
   5054     }
   5055     if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
   5056       OutOps.push_back(Op0);
   5057       OutOps.push_back(Op1);
   5058       return false;
   5059     }
   5060     break;
   5061   }
   5062   return true;
   5063 }
   5064