Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines an instruction selector for the NVPTX target.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "NVPTXISelDAGToDAG.h"
     15 #include "NVPTXUtilities.h"
     16 #include "llvm/Analysis/ValueTracking.h"
     17 #include "llvm/IR/GlobalValue.h"
     18 #include "llvm/IR/Instructions.h"
     19 #include "llvm/Support/CommandLine.h"
     20 #include "llvm/Support/Debug.h"
     21 #include "llvm/Support/ErrorHandling.h"
     22 #include "llvm/Support/raw_ostream.h"
     23 #include "llvm/Target/TargetIntrinsicInfo.h"
     24 
     25 using namespace llvm;
     26 
     27 #define DEBUG_TYPE "nvptx-isel"
     28 
     29 static cl::opt<int> UsePrecDivF32(
     30     "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
     31     cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
     32              " IEEE Compliant F32 div.rnd if available."),
     33     cl::init(2));
     34 
     35 static cl::opt<bool>
     36 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
     37           cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
     38           cl::init(true));
     39 
     40 static cl::opt<bool>
     41 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
     42            cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
     43            cl::init(false));
     44 
     45 
     46 /// createNVPTXISelDag - This pass converts a legalized DAG into a
     47 /// NVPTX-specific DAG, ready for instruction scheduling.
     48 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
     49                                        llvm::CodeGenOpt::Level OptLevel) {
     50   return new NVPTXDAGToDAGISel(TM, OptLevel);
     51 }
     52 
     53 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
     54                                      CodeGenOpt::Level OptLevel)
     55     : SelectionDAGISel(tm, OptLevel), TM(tm) {
     56   doMulWide = (OptLevel > 0);
     57 }
     58 
     59 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
     60     Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
     61     return SelectionDAGISel::runOnMachineFunction(MF);
     62 }
     63 
     64 int NVPTXDAGToDAGISel::getDivF32Level() const {
     65   if (UsePrecDivF32.getNumOccurrences() > 0) {
     66     // If nvptx-prec-div32=N is used on the command-line, always honor it
     67     return UsePrecDivF32;
     68   } else {
     69     // Otherwise, use div.approx if fast math is enabled
     70     if (TM.Options.UnsafeFPMath)
     71       return 0;
     72     else
     73       return 2;
     74   }
     75 }
     76 
     77 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
     78   if (UsePrecSqrtF32.getNumOccurrences() > 0) {
     79     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
     80     return UsePrecSqrtF32;
     81   } else {
     82     // Otherwise, use sqrt.approx if fast math is enabled
     83     return !TM.Options.UnsafeFPMath;
     84   }
     85 }
     86 
     87 bool NVPTXDAGToDAGISel::useF32FTZ() const {
     88   if (FtzEnabled.getNumOccurrences() > 0) {
     89     // If nvptx-f32ftz is used on the command-line, always honor it
     90     return FtzEnabled;
     91   } else {
     92     const Function *F = MF->getFunction();
     93     // Otherwise, check for an nvptx-f32ftz attribute on the function
     94     if (F->hasFnAttribute("nvptx-f32ftz"))
     95       return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
     96     else
     97       return false;
     98   }
     99 }
    100 
    101 bool NVPTXDAGToDAGISel::allowFMA() const {
    102   const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
    103   return TL->allowFMA(*MF, OptLevel);
    104 }
    105 
    106 /// Select - Select instructions not customized! Used for
    107 /// expanded, promoted and normal instructions.
    108 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
    109 
    110   if (N->isMachineOpcode()) {
    111     N->setNodeId(-1);
    112     return nullptr; // Already selected.
    113   }
    114 
    115   SDNode *ResNode = nullptr;
    116   switch (N->getOpcode()) {
    117   case ISD::LOAD:
    118     ResNode = SelectLoad(N);
    119     break;
    120   case ISD::STORE:
    121     ResNode = SelectStore(N);
    122     break;
    123   case NVPTXISD::LoadV2:
    124   case NVPTXISD::LoadV4:
    125     ResNode = SelectLoadVector(N);
    126     break;
    127   case NVPTXISD::LDGV2:
    128   case NVPTXISD::LDGV4:
    129   case NVPTXISD::LDUV2:
    130   case NVPTXISD::LDUV4:
    131     ResNode = SelectLDGLDU(N);
    132     break;
    133   case NVPTXISD::StoreV2:
    134   case NVPTXISD::StoreV4:
    135     ResNode = SelectStoreVector(N);
    136     break;
    137   case NVPTXISD::LoadParam:
    138   case NVPTXISD::LoadParamV2:
    139   case NVPTXISD::LoadParamV4:
    140     ResNode = SelectLoadParam(N);
    141     break;
    142   case NVPTXISD::StoreRetval:
    143   case NVPTXISD::StoreRetvalV2:
    144   case NVPTXISD::StoreRetvalV4:
    145     ResNode = SelectStoreRetval(N);
    146     break;
    147   case NVPTXISD::StoreParam:
    148   case NVPTXISD::StoreParamV2:
    149   case NVPTXISD::StoreParamV4:
    150   case NVPTXISD::StoreParamS32:
    151   case NVPTXISD::StoreParamU32:
    152     ResNode = SelectStoreParam(N);
    153     break;
    154   case ISD::INTRINSIC_WO_CHAIN:
    155     ResNode = SelectIntrinsicNoChain(N);
    156     break;
    157   case ISD::INTRINSIC_W_CHAIN:
    158     ResNode = SelectIntrinsicChain(N);
    159     break;
    160   case NVPTXISD::Tex1DFloatS32:
    161   case NVPTXISD::Tex1DFloatFloat:
    162   case NVPTXISD::Tex1DFloatFloatLevel:
    163   case NVPTXISD::Tex1DFloatFloatGrad:
    164   case NVPTXISD::Tex1DS32S32:
    165   case NVPTXISD::Tex1DS32Float:
    166   case NVPTXISD::Tex1DS32FloatLevel:
    167   case NVPTXISD::Tex1DS32FloatGrad:
    168   case NVPTXISD::Tex1DU32S32:
    169   case NVPTXISD::Tex1DU32Float:
    170   case NVPTXISD::Tex1DU32FloatLevel:
    171   case NVPTXISD::Tex1DU32FloatGrad:
    172   case NVPTXISD::Tex1DArrayFloatS32:
    173   case NVPTXISD::Tex1DArrayFloatFloat:
    174   case NVPTXISD::Tex1DArrayFloatFloatLevel:
    175   case NVPTXISD::Tex1DArrayFloatFloatGrad:
    176   case NVPTXISD::Tex1DArrayS32S32:
    177   case NVPTXISD::Tex1DArrayS32Float:
    178   case NVPTXISD::Tex1DArrayS32FloatLevel:
    179   case NVPTXISD::Tex1DArrayS32FloatGrad:
    180   case NVPTXISD::Tex1DArrayU32S32:
    181   case NVPTXISD::Tex1DArrayU32Float:
    182   case NVPTXISD::Tex1DArrayU32FloatLevel:
    183   case NVPTXISD::Tex1DArrayU32FloatGrad:
    184   case NVPTXISD::Tex2DFloatS32:
    185   case NVPTXISD::Tex2DFloatFloat:
    186   case NVPTXISD::Tex2DFloatFloatLevel:
    187   case NVPTXISD::Tex2DFloatFloatGrad:
    188   case NVPTXISD::Tex2DS32S32:
    189   case NVPTXISD::Tex2DS32Float:
    190   case NVPTXISD::Tex2DS32FloatLevel:
    191   case NVPTXISD::Tex2DS32FloatGrad:
    192   case NVPTXISD::Tex2DU32S32:
    193   case NVPTXISD::Tex2DU32Float:
    194   case NVPTXISD::Tex2DU32FloatLevel:
    195   case NVPTXISD::Tex2DU32FloatGrad:
    196   case NVPTXISD::Tex2DArrayFloatS32:
    197   case NVPTXISD::Tex2DArrayFloatFloat:
    198   case NVPTXISD::Tex2DArrayFloatFloatLevel:
    199   case NVPTXISD::Tex2DArrayFloatFloatGrad:
    200   case NVPTXISD::Tex2DArrayS32S32:
    201   case NVPTXISD::Tex2DArrayS32Float:
    202   case NVPTXISD::Tex2DArrayS32FloatLevel:
    203   case NVPTXISD::Tex2DArrayS32FloatGrad:
    204   case NVPTXISD::Tex2DArrayU32S32:
    205   case NVPTXISD::Tex2DArrayU32Float:
    206   case NVPTXISD::Tex2DArrayU32FloatLevel:
    207   case NVPTXISD::Tex2DArrayU32FloatGrad:
    208   case NVPTXISD::Tex3DFloatS32:
    209   case NVPTXISD::Tex3DFloatFloat:
    210   case NVPTXISD::Tex3DFloatFloatLevel:
    211   case NVPTXISD::Tex3DFloatFloatGrad:
    212   case NVPTXISD::Tex3DS32S32:
    213   case NVPTXISD::Tex3DS32Float:
    214   case NVPTXISD::Tex3DS32FloatLevel:
    215   case NVPTXISD::Tex3DS32FloatGrad:
    216   case NVPTXISD::Tex3DU32S32:
    217   case NVPTXISD::Tex3DU32Float:
    218   case NVPTXISD::Tex3DU32FloatLevel:
    219   case NVPTXISD::Tex3DU32FloatGrad:
    220   case NVPTXISD::TexCubeFloatFloat:
    221   case NVPTXISD::TexCubeFloatFloatLevel:
    222   case NVPTXISD::TexCubeS32Float:
    223   case NVPTXISD::TexCubeS32FloatLevel:
    224   case NVPTXISD::TexCubeU32Float:
    225   case NVPTXISD::TexCubeU32FloatLevel:
    226   case NVPTXISD::TexCubeArrayFloatFloat:
    227   case NVPTXISD::TexCubeArrayFloatFloatLevel:
    228   case NVPTXISD::TexCubeArrayS32Float:
    229   case NVPTXISD::TexCubeArrayS32FloatLevel:
    230   case NVPTXISD::TexCubeArrayU32Float:
    231   case NVPTXISD::TexCubeArrayU32FloatLevel:
    232   case NVPTXISD::Tld4R2DFloatFloat:
    233   case NVPTXISD::Tld4G2DFloatFloat:
    234   case NVPTXISD::Tld4B2DFloatFloat:
    235   case NVPTXISD::Tld4A2DFloatFloat:
    236   case NVPTXISD::Tld4R2DS64Float:
    237   case NVPTXISD::Tld4G2DS64Float:
    238   case NVPTXISD::Tld4B2DS64Float:
    239   case NVPTXISD::Tld4A2DS64Float:
    240   case NVPTXISD::Tld4R2DU64Float:
    241   case NVPTXISD::Tld4G2DU64Float:
    242   case NVPTXISD::Tld4B2DU64Float:
    243   case NVPTXISD::Tld4A2DU64Float:
    244   case NVPTXISD::TexUnified1DFloatS32:
    245   case NVPTXISD::TexUnified1DFloatFloat:
    246   case NVPTXISD::TexUnified1DFloatFloatLevel:
    247   case NVPTXISD::TexUnified1DFloatFloatGrad:
    248   case NVPTXISD::TexUnified1DS32S32:
    249   case NVPTXISD::TexUnified1DS32Float:
    250   case NVPTXISD::TexUnified1DS32FloatLevel:
    251   case NVPTXISD::TexUnified1DS32FloatGrad:
    252   case NVPTXISD::TexUnified1DU32S32:
    253   case NVPTXISD::TexUnified1DU32Float:
    254   case NVPTXISD::TexUnified1DU32FloatLevel:
    255   case NVPTXISD::TexUnified1DU32FloatGrad:
    256   case NVPTXISD::TexUnified1DArrayFloatS32:
    257   case NVPTXISD::TexUnified1DArrayFloatFloat:
    258   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
    259   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
    260   case NVPTXISD::TexUnified1DArrayS32S32:
    261   case NVPTXISD::TexUnified1DArrayS32Float:
    262   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
    263   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
    264   case NVPTXISD::TexUnified1DArrayU32S32:
    265   case NVPTXISD::TexUnified1DArrayU32Float:
    266   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
    267   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
    268   case NVPTXISD::TexUnified2DFloatS32:
    269   case NVPTXISD::TexUnified2DFloatFloat:
    270   case NVPTXISD::TexUnified2DFloatFloatLevel:
    271   case NVPTXISD::TexUnified2DFloatFloatGrad:
    272   case NVPTXISD::TexUnified2DS32S32:
    273   case NVPTXISD::TexUnified2DS32Float:
    274   case NVPTXISD::TexUnified2DS32FloatLevel:
    275   case NVPTXISD::TexUnified2DS32FloatGrad:
    276   case NVPTXISD::TexUnified2DU32S32:
    277   case NVPTXISD::TexUnified2DU32Float:
    278   case NVPTXISD::TexUnified2DU32FloatLevel:
    279   case NVPTXISD::TexUnified2DU32FloatGrad:
    280   case NVPTXISD::TexUnified2DArrayFloatS32:
    281   case NVPTXISD::TexUnified2DArrayFloatFloat:
    282   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
    283   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
    284   case NVPTXISD::TexUnified2DArrayS32S32:
    285   case NVPTXISD::TexUnified2DArrayS32Float:
    286   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
    287   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
    288   case NVPTXISD::TexUnified2DArrayU32S32:
    289   case NVPTXISD::TexUnified2DArrayU32Float:
    290   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
    291   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
    292   case NVPTXISD::TexUnified3DFloatS32:
    293   case NVPTXISD::TexUnified3DFloatFloat:
    294   case NVPTXISD::TexUnified3DFloatFloatLevel:
    295   case NVPTXISD::TexUnified3DFloatFloatGrad:
    296   case NVPTXISD::TexUnified3DS32S32:
    297   case NVPTXISD::TexUnified3DS32Float:
    298   case NVPTXISD::TexUnified3DS32FloatLevel:
    299   case NVPTXISD::TexUnified3DS32FloatGrad:
    300   case NVPTXISD::TexUnified3DU32S32:
    301   case NVPTXISD::TexUnified3DU32Float:
    302   case NVPTXISD::TexUnified3DU32FloatLevel:
    303   case NVPTXISD::TexUnified3DU32FloatGrad:
    304   case NVPTXISD::TexUnifiedCubeFloatFloat:
    305   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
    306   case NVPTXISD::TexUnifiedCubeS32Float:
    307   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
    308   case NVPTXISD::TexUnifiedCubeU32Float:
    309   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
    310   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
    311   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
    312   case NVPTXISD::TexUnifiedCubeArrayS32Float:
    313   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
    314   case NVPTXISD::TexUnifiedCubeArrayU32Float:
    315   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
    316   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
    317   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
    318   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
    319   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
    320   case NVPTXISD::Tld4UnifiedR2DS64Float:
    321   case NVPTXISD::Tld4UnifiedG2DS64Float:
    322   case NVPTXISD::Tld4UnifiedB2DS64Float:
    323   case NVPTXISD::Tld4UnifiedA2DS64Float:
    324   case NVPTXISD::Tld4UnifiedR2DU64Float:
    325   case NVPTXISD::Tld4UnifiedG2DU64Float:
    326   case NVPTXISD::Tld4UnifiedB2DU64Float:
    327   case NVPTXISD::Tld4UnifiedA2DU64Float:
    328     ResNode = SelectTextureIntrinsic(N);
    329     break;
    330   case NVPTXISD::Suld1DI8Clamp:
    331   case NVPTXISD::Suld1DI16Clamp:
    332   case NVPTXISD::Suld1DI32Clamp:
    333   case NVPTXISD::Suld1DI64Clamp:
    334   case NVPTXISD::Suld1DV2I8Clamp:
    335   case NVPTXISD::Suld1DV2I16Clamp:
    336   case NVPTXISD::Suld1DV2I32Clamp:
    337   case NVPTXISD::Suld1DV2I64Clamp:
    338   case NVPTXISD::Suld1DV4I8Clamp:
    339   case NVPTXISD::Suld1DV4I16Clamp:
    340   case NVPTXISD::Suld1DV4I32Clamp:
    341   case NVPTXISD::Suld1DArrayI8Clamp:
    342   case NVPTXISD::Suld1DArrayI16Clamp:
    343   case NVPTXISD::Suld1DArrayI32Clamp:
    344   case NVPTXISD::Suld1DArrayI64Clamp:
    345   case NVPTXISD::Suld1DArrayV2I8Clamp:
    346   case NVPTXISD::Suld1DArrayV2I16Clamp:
    347   case NVPTXISD::Suld1DArrayV2I32Clamp:
    348   case NVPTXISD::Suld1DArrayV2I64Clamp:
    349   case NVPTXISD::Suld1DArrayV4I8Clamp:
    350   case NVPTXISD::Suld1DArrayV4I16Clamp:
    351   case NVPTXISD::Suld1DArrayV4I32Clamp:
    352   case NVPTXISD::Suld2DI8Clamp:
    353   case NVPTXISD::Suld2DI16Clamp:
    354   case NVPTXISD::Suld2DI32Clamp:
    355   case NVPTXISD::Suld2DI64Clamp:
    356   case NVPTXISD::Suld2DV2I8Clamp:
    357   case NVPTXISD::Suld2DV2I16Clamp:
    358   case NVPTXISD::Suld2DV2I32Clamp:
    359   case NVPTXISD::Suld2DV2I64Clamp:
    360   case NVPTXISD::Suld2DV4I8Clamp:
    361   case NVPTXISD::Suld2DV4I16Clamp:
    362   case NVPTXISD::Suld2DV4I32Clamp:
    363   case NVPTXISD::Suld2DArrayI8Clamp:
    364   case NVPTXISD::Suld2DArrayI16Clamp:
    365   case NVPTXISD::Suld2DArrayI32Clamp:
    366   case NVPTXISD::Suld2DArrayI64Clamp:
    367   case NVPTXISD::Suld2DArrayV2I8Clamp:
    368   case NVPTXISD::Suld2DArrayV2I16Clamp:
    369   case NVPTXISD::Suld2DArrayV2I32Clamp:
    370   case NVPTXISD::Suld2DArrayV2I64Clamp:
    371   case NVPTXISD::Suld2DArrayV4I8Clamp:
    372   case NVPTXISD::Suld2DArrayV4I16Clamp:
    373   case NVPTXISD::Suld2DArrayV4I32Clamp:
    374   case NVPTXISD::Suld3DI8Clamp:
    375   case NVPTXISD::Suld3DI16Clamp:
    376   case NVPTXISD::Suld3DI32Clamp:
    377   case NVPTXISD::Suld3DI64Clamp:
    378   case NVPTXISD::Suld3DV2I8Clamp:
    379   case NVPTXISD::Suld3DV2I16Clamp:
    380   case NVPTXISD::Suld3DV2I32Clamp:
    381   case NVPTXISD::Suld3DV2I64Clamp:
    382   case NVPTXISD::Suld3DV4I8Clamp:
    383   case NVPTXISD::Suld3DV4I16Clamp:
    384   case NVPTXISD::Suld3DV4I32Clamp:
    385   case NVPTXISD::Suld1DI8Trap:
    386   case NVPTXISD::Suld1DI16Trap:
    387   case NVPTXISD::Suld1DI32Trap:
    388   case NVPTXISD::Suld1DI64Trap:
    389   case NVPTXISD::Suld1DV2I8Trap:
    390   case NVPTXISD::Suld1DV2I16Trap:
    391   case NVPTXISD::Suld1DV2I32Trap:
    392   case NVPTXISD::Suld1DV2I64Trap:
    393   case NVPTXISD::Suld1DV4I8Trap:
    394   case NVPTXISD::Suld1DV4I16Trap:
    395   case NVPTXISD::Suld1DV4I32Trap:
    396   case NVPTXISD::Suld1DArrayI8Trap:
    397   case NVPTXISD::Suld1DArrayI16Trap:
    398   case NVPTXISD::Suld1DArrayI32Trap:
    399   case NVPTXISD::Suld1DArrayI64Trap:
    400   case NVPTXISD::Suld1DArrayV2I8Trap:
    401   case NVPTXISD::Suld1DArrayV2I16Trap:
    402   case NVPTXISD::Suld1DArrayV2I32Trap:
    403   case NVPTXISD::Suld1DArrayV2I64Trap:
    404   case NVPTXISD::Suld1DArrayV4I8Trap:
    405   case NVPTXISD::Suld1DArrayV4I16Trap:
    406   case NVPTXISD::Suld1DArrayV4I32Trap:
    407   case NVPTXISD::Suld2DI8Trap:
    408   case NVPTXISD::Suld2DI16Trap:
    409   case NVPTXISD::Suld2DI32Trap:
    410   case NVPTXISD::Suld2DI64Trap:
    411   case NVPTXISD::Suld2DV2I8Trap:
    412   case NVPTXISD::Suld2DV2I16Trap:
    413   case NVPTXISD::Suld2DV2I32Trap:
    414   case NVPTXISD::Suld2DV2I64Trap:
    415   case NVPTXISD::Suld2DV4I8Trap:
    416   case NVPTXISD::Suld2DV4I16Trap:
    417   case NVPTXISD::Suld2DV4I32Trap:
    418   case NVPTXISD::Suld2DArrayI8Trap:
    419   case NVPTXISD::Suld2DArrayI16Trap:
    420   case NVPTXISD::Suld2DArrayI32Trap:
    421   case NVPTXISD::Suld2DArrayI64Trap:
    422   case NVPTXISD::Suld2DArrayV2I8Trap:
    423   case NVPTXISD::Suld2DArrayV2I16Trap:
    424   case NVPTXISD::Suld2DArrayV2I32Trap:
    425   case NVPTXISD::Suld2DArrayV2I64Trap:
    426   case NVPTXISD::Suld2DArrayV4I8Trap:
    427   case NVPTXISD::Suld2DArrayV4I16Trap:
    428   case NVPTXISD::Suld2DArrayV4I32Trap:
    429   case NVPTXISD::Suld3DI8Trap:
    430   case NVPTXISD::Suld3DI16Trap:
    431   case NVPTXISD::Suld3DI32Trap:
    432   case NVPTXISD::Suld3DI64Trap:
    433   case NVPTXISD::Suld3DV2I8Trap:
    434   case NVPTXISD::Suld3DV2I16Trap:
    435   case NVPTXISD::Suld3DV2I32Trap:
    436   case NVPTXISD::Suld3DV2I64Trap:
    437   case NVPTXISD::Suld3DV4I8Trap:
    438   case NVPTXISD::Suld3DV4I16Trap:
    439   case NVPTXISD::Suld3DV4I32Trap:
    440   case NVPTXISD::Suld1DI8Zero:
    441   case NVPTXISD::Suld1DI16Zero:
    442   case NVPTXISD::Suld1DI32Zero:
    443   case NVPTXISD::Suld1DI64Zero:
    444   case NVPTXISD::Suld1DV2I8Zero:
    445   case NVPTXISD::Suld1DV2I16Zero:
    446   case NVPTXISD::Suld1DV2I32Zero:
    447   case NVPTXISD::Suld1DV2I64Zero:
    448   case NVPTXISD::Suld1DV4I8Zero:
    449   case NVPTXISD::Suld1DV4I16Zero:
    450   case NVPTXISD::Suld1DV4I32Zero:
    451   case NVPTXISD::Suld1DArrayI8Zero:
    452   case NVPTXISD::Suld1DArrayI16Zero:
    453   case NVPTXISD::Suld1DArrayI32Zero:
    454   case NVPTXISD::Suld1DArrayI64Zero:
    455   case NVPTXISD::Suld1DArrayV2I8Zero:
    456   case NVPTXISD::Suld1DArrayV2I16Zero:
    457   case NVPTXISD::Suld1DArrayV2I32Zero:
    458   case NVPTXISD::Suld1DArrayV2I64Zero:
    459   case NVPTXISD::Suld1DArrayV4I8Zero:
    460   case NVPTXISD::Suld1DArrayV4I16Zero:
    461   case NVPTXISD::Suld1DArrayV4I32Zero:
    462   case NVPTXISD::Suld2DI8Zero:
    463   case NVPTXISD::Suld2DI16Zero:
    464   case NVPTXISD::Suld2DI32Zero:
    465   case NVPTXISD::Suld2DI64Zero:
    466   case NVPTXISD::Suld2DV2I8Zero:
    467   case NVPTXISD::Suld2DV2I16Zero:
    468   case NVPTXISD::Suld2DV2I32Zero:
    469   case NVPTXISD::Suld2DV2I64Zero:
    470   case NVPTXISD::Suld2DV4I8Zero:
    471   case NVPTXISD::Suld2DV4I16Zero:
    472   case NVPTXISD::Suld2DV4I32Zero:
    473   case NVPTXISD::Suld2DArrayI8Zero:
    474   case NVPTXISD::Suld2DArrayI16Zero:
    475   case NVPTXISD::Suld2DArrayI32Zero:
    476   case NVPTXISD::Suld2DArrayI64Zero:
    477   case NVPTXISD::Suld2DArrayV2I8Zero:
    478   case NVPTXISD::Suld2DArrayV2I16Zero:
    479   case NVPTXISD::Suld2DArrayV2I32Zero:
    480   case NVPTXISD::Suld2DArrayV2I64Zero:
    481   case NVPTXISD::Suld2DArrayV4I8Zero:
    482   case NVPTXISD::Suld2DArrayV4I16Zero:
    483   case NVPTXISD::Suld2DArrayV4I32Zero:
    484   case NVPTXISD::Suld3DI8Zero:
    485   case NVPTXISD::Suld3DI16Zero:
    486   case NVPTXISD::Suld3DI32Zero:
    487   case NVPTXISD::Suld3DI64Zero:
    488   case NVPTXISD::Suld3DV2I8Zero:
    489   case NVPTXISD::Suld3DV2I16Zero:
    490   case NVPTXISD::Suld3DV2I32Zero:
    491   case NVPTXISD::Suld3DV2I64Zero:
    492   case NVPTXISD::Suld3DV4I8Zero:
    493   case NVPTXISD::Suld3DV4I16Zero:
    494   case NVPTXISD::Suld3DV4I32Zero:
    495     ResNode = SelectSurfaceIntrinsic(N);
    496     break;
    497   case ISD::AND:
    498   case ISD::SRA:
    499   case ISD::SRL:
    500     // Try to select BFE
    501     ResNode = SelectBFE(N);
    502     break;
    503   case ISD::ADDRSPACECAST:
    504     ResNode = SelectAddrSpaceCast(N);
    505     break;
    506   default:
    507     break;
    508   }
    509   if (ResNode)
    510     return ResNode;
    511   return SelectCode(N);
    512 }
    513 
    514 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
    515   unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
    516   switch (IID) {
    517   default:
    518     return NULL;
    519   case Intrinsic::nvvm_ldg_global_f:
    520   case Intrinsic::nvvm_ldg_global_i:
    521   case Intrinsic::nvvm_ldg_global_p:
    522   case Intrinsic::nvvm_ldu_global_f:
    523   case Intrinsic::nvvm_ldu_global_i:
    524   case Intrinsic::nvvm_ldu_global_p:
    525     return SelectLDGLDU(N);
    526   }
    527 }
    528 
    529 static unsigned int getCodeAddrSpace(MemSDNode *N) {
    530   const Value *Src = N->getMemOperand()->getValue();
    531 
    532   if (!Src)
    533     return NVPTX::PTXLdStInstCode::GENERIC;
    534 
    535   if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
    536     switch (PT->getAddressSpace()) {
    537     case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
    538     case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
    539     case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
    540     case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
    541     case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
    542     case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
    543     default: break;
    544     }
    545   }
    546   return NVPTX::PTXLdStInstCode::GENERIC;
    547 }
    548 
    549 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
    550                           unsigned CodeAddrSpace, MachineFunction *F) {
    551   // To use non-coherent caching, the load has to be from global
    552   // memory and we have to prove that the memory area is not written
    553   // to anywhere for the duration of the kernel call, not even after
    554   // the load.
    555   //
    556   // To ensure that there are no writes to the memory, we require the
    557   // underlying pointer to be a noalias (__restrict) kernel parameter
    558   // that is never used for a write. We can only do this for kernel
    559   // functions since from within a device function, we cannot know if
    560   // there were or will be writes to the memory from the caller - or we
    561   // could, but then we would have to do inter-procedural analysis.
    562   if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
    563       !isKernelFunction(*F->getFunction())) {
    564     return false;
    565   }
    566 
    567   // We use GetUnderlyingObjects() here instead of
    568   // GetUnderlyingObject() mainly because the former looks through phi
    569   // nodes while the latter does not. We need to look through phi
    570   // nodes to handle pointer induction variables.
    571   SmallVector<Value *, 8> Objs;
    572   GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
    573                        Objs, F->getDataLayout());
    574   for (Value *Obj : Objs) {
    575     auto *A = dyn_cast<const Argument>(Obj);
    576     if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
    577   }
    578 
    579   return true;
    580 }
    581 
    582 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
    583   unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
    584   switch (IID) {
    585   default:
    586     return nullptr;
    587   case Intrinsic::nvvm_texsurf_handle_internal:
    588     return SelectTexSurfHandle(N);
    589   }
    590 }
    591 
    592 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
    593   // Op 0 is the intrinsic ID
    594   SDValue Wrapper = N->getOperand(1);
    595   SDValue GlobalVal = Wrapper.getOperand(0);
    596   return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
    597                                 GlobalVal);
    598 }
    599 
    600 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
    601   SDValue Src = N->getOperand(0);
    602   AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
    603   unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
    604   unsigned DstAddrSpace = CastN->getDestAddressSpace();
    605 
    606   assert(SrcAddrSpace != DstAddrSpace &&
    607          "addrspacecast must be between different address spaces");
    608 
    609   if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
    610     // Specific to generic
    611     unsigned Opc;
    612     switch (SrcAddrSpace) {
    613     default: report_fatal_error("Bad address space in addrspacecast");
    614     case ADDRESS_SPACE_GLOBAL:
    615       Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
    616       break;
    617     case ADDRESS_SPACE_SHARED:
    618       Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
    619       break;
    620     case ADDRESS_SPACE_CONST:
    621       Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
    622       break;
    623     case ADDRESS_SPACE_LOCAL:
    624       Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
    625       break;
    626     }
    627     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
    628   } else {
    629     // Generic to specific
    630     if (SrcAddrSpace != 0)
    631       report_fatal_error("Cannot cast between two non-generic address spaces");
    632     unsigned Opc;
    633     switch (DstAddrSpace) {
    634     default: report_fatal_error("Bad address space in addrspacecast");
    635     case ADDRESS_SPACE_GLOBAL:
    636       Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
    637                          : NVPTX::cvta_to_global_yes;
    638       break;
    639     case ADDRESS_SPACE_SHARED:
    640       Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
    641                          : NVPTX::cvta_to_shared_yes;
    642       break;
    643     case ADDRESS_SPACE_CONST:
    644       Opc =
    645           TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
    646       break;
    647     case ADDRESS_SPACE_LOCAL:
    648       Opc =
    649           TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
    650       break;
    651     case ADDRESS_SPACE_PARAM:
    652       Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
    653                          : NVPTX::nvvm_ptr_gen_to_param;
    654       break;
    655     }
    656     return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
    657   }
    658 }
    659 
    660 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
    661   SDLoc dl(N);
    662   LoadSDNode *LD = cast<LoadSDNode>(N);
    663   EVT LoadedVT = LD->getMemoryVT();
    664   SDNode *NVPTXLD = nullptr;
    665 
    666   // do not support pre/post inc/dec
    667   if (LD->isIndexed())
    668     return nullptr;
    669 
    670   if (!LoadedVT.isSimple())
    671     return nullptr;
    672 
    673   // Address Space Setting
    674   unsigned int codeAddrSpace = getCodeAddrSpace(LD);
    675 
    676   if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
    677     return SelectLDGLDU(N);
    678   }
    679 
    680   // Volatile Setting
    681   // - .volatile is only availalble for .global and .shared
    682   bool isVolatile = LD->isVolatile();
    683   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    684       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    685       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    686     isVolatile = false;
    687 
    688   // Vector Setting
    689   MVT SimpleVT = LoadedVT.getSimpleVT();
    690   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
    691   if (SimpleVT.isVector()) {
    692     unsigned num = SimpleVT.getVectorNumElements();
    693     if (num == 2)
    694       vecType = NVPTX::PTXLdStInstCode::V2;
    695     else if (num == 4)
    696       vecType = NVPTX::PTXLdStInstCode::V4;
    697     else
    698       return nullptr;
    699   }
    700 
    701   // Type Setting: fromType + fromTypeWidth
    702   //
    703   // Sign   : ISD::SEXTLOAD
    704   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
    705   //          type is integer
    706   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
    707   MVT ScalarVT = SimpleVT.getScalarType();
    708   // Read at least 8 bits (predicates are stored as 8-bit values)
    709   unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
    710   unsigned int fromType;
    711   if ((LD->getExtensionType() == ISD::SEXTLOAD))
    712     fromType = NVPTX::PTXLdStInstCode::Signed;
    713   else if (ScalarVT.isFloatingPoint())
    714     fromType = NVPTX::PTXLdStInstCode::Float;
    715   else
    716     fromType = NVPTX::PTXLdStInstCode::Unsigned;
    717 
    718   // Create the machine instruction DAG
    719   SDValue Chain = N->getOperand(0);
    720   SDValue N1 = N->getOperand(1);
    721   SDValue Addr;
    722   SDValue Offset, Base;
    723   unsigned Opcode;
    724   MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
    725 
    726   if (SelectDirectAddr(N1, Addr)) {
    727     switch (TargetVT) {
    728     case MVT::i8:
    729       Opcode = NVPTX::LD_i8_avar;
    730       break;
    731     case MVT::i16:
    732       Opcode = NVPTX::LD_i16_avar;
    733       break;
    734     case MVT::i32:
    735       Opcode = NVPTX::LD_i32_avar;
    736       break;
    737     case MVT::i64:
    738       Opcode = NVPTX::LD_i64_avar;
    739       break;
    740     case MVT::f32:
    741       Opcode = NVPTX::LD_f32_avar;
    742       break;
    743     case MVT::f64:
    744       Opcode = NVPTX::LD_f64_avar;
    745       break;
    746     default:
    747       return nullptr;
    748     }
    749     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
    750                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
    751                       getI32Imm(fromTypeWidth, dl), Addr, Chain };
    752     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    753   } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
    754                           : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
    755     switch (TargetVT) {
    756     case MVT::i8:
    757       Opcode = NVPTX::LD_i8_asi;
    758       break;
    759     case MVT::i16:
    760       Opcode = NVPTX::LD_i16_asi;
    761       break;
    762     case MVT::i32:
    763       Opcode = NVPTX::LD_i32_asi;
    764       break;
    765     case MVT::i64:
    766       Opcode = NVPTX::LD_i64_asi;
    767       break;
    768     case MVT::f32:
    769       Opcode = NVPTX::LD_f32_asi;
    770       break;
    771     case MVT::f64:
    772       Opcode = NVPTX::LD_f64_asi;
    773       break;
    774     default:
    775       return nullptr;
    776     }
    777     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
    778                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
    779                       getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
    780     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    781   } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
    782                           : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
    783     if (TM.is64Bit()) {
    784       switch (TargetVT) {
    785       case MVT::i8:
    786         Opcode = NVPTX::LD_i8_ari_64;
    787         break;
    788       case MVT::i16:
    789         Opcode = NVPTX::LD_i16_ari_64;
    790         break;
    791       case MVT::i32:
    792         Opcode = NVPTX::LD_i32_ari_64;
    793         break;
    794       case MVT::i64:
    795         Opcode = NVPTX::LD_i64_ari_64;
    796         break;
    797       case MVT::f32:
    798         Opcode = NVPTX::LD_f32_ari_64;
    799         break;
    800       case MVT::f64:
    801         Opcode = NVPTX::LD_f64_ari_64;
    802         break;
    803       default:
    804         return nullptr;
    805       }
    806     } else {
    807       switch (TargetVT) {
    808       case MVT::i8:
    809         Opcode = NVPTX::LD_i8_ari;
    810         break;
    811       case MVT::i16:
    812         Opcode = NVPTX::LD_i16_ari;
    813         break;
    814       case MVT::i32:
    815         Opcode = NVPTX::LD_i32_ari;
    816         break;
    817       case MVT::i64:
    818         Opcode = NVPTX::LD_i64_ari;
    819         break;
    820       case MVT::f32:
    821         Opcode = NVPTX::LD_f32_ari;
    822         break;
    823       case MVT::f64:
    824         Opcode = NVPTX::LD_f64_ari;
    825         break;
    826       default:
    827         return nullptr;
    828       }
    829     }
    830     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
    831                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
    832                       getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
    833     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    834   } else {
    835     if (TM.is64Bit()) {
    836       switch (TargetVT) {
    837       case MVT::i8:
    838         Opcode = NVPTX::LD_i8_areg_64;
    839         break;
    840       case MVT::i16:
    841         Opcode = NVPTX::LD_i16_areg_64;
    842         break;
    843       case MVT::i32:
    844         Opcode = NVPTX::LD_i32_areg_64;
    845         break;
    846       case MVT::i64:
    847         Opcode = NVPTX::LD_i64_areg_64;
    848         break;
    849       case MVT::f32:
    850         Opcode = NVPTX::LD_f32_areg_64;
    851         break;
    852       case MVT::f64:
    853         Opcode = NVPTX::LD_f64_areg_64;
    854         break;
    855       default:
    856         return nullptr;
    857       }
    858     } else {
    859       switch (TargetVT) {
    860       case MVT::i8:
    861         Opcode = NVPTX::LD_i8_areg;
    862         break;
    863       case MVT::i16:
    864         Opcode = NVPTX::LD_i16_areg;
    865         break;
    866       case MVT::i32:
    867         Opcode = NVPTX::LD_i32_areg;
    868         break;
    869       case MVT::i64:
    870         Opcode = NVPTX::LD_i64_areg;
    871         break;
    872       case MVT::f32:
    873         Opcode = NVPTX::LD_f32_areg;
    874         break;
    875       case MVT::f64:
    876         Opcode = NVPTX::LD_f64_areg;
    877         break;
    878       default:
    879         return nullptr;
    880       }
    881     }
    882     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
    883                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
    884                       getI32Imm(fromTypeWidth, dl), N1, Chain };
    885     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    886   }
    887 
    888   if (NVPTXLD) {
    889     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
    890     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
    891     cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
    892   }
    893 
    894   return NVPTXLD;
    895 }
    896 
    897 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
    898 
    899   SDValue Chain = N->getOperand(0);
    900   SDValue Op1 = N->getOperand(1);
    901   SDValue Addr, Offset, Base;
    902   unsigned Opcode;
    903   SDLoc DL(N);
    904   SDNode *LD;
    905   MemSDNode *MemSD = cast<MemSDNode>(N);
    906   EVT LoadedVT = MemSD->getMemoryVT();
    907 
    908   if (!LoadedVT.isSimple())
    909     return nullptr;
    910 
    911   // Address Space Setting
    912   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
    913 
    914   if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
    915     return SelectLDGLDU(N);
    916   }
    917 
    918   // Volatile Setting
    919   // - .volatile is only availalble for .global and .shared
    920   bool IsVolatile = MemSD->isVolatile();
    921   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    922       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    923       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    924     IsVolatile = false;
    925 
    926   // Vector Setting
    927   MVT SimpleVT = LoadedVT.getSimpleVT();
    928 
    929   // Type Setting: fromType + fromTypeWidth
    930   //
    931   // Sign   : ISD::SEXTLOAD
    932   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
    933   //          type is integer
    934   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
    935   MVT ScalarVT = SimpleVT.getScalarType();
    936   // Read at least 8 bits (predicates are stored as 8-bit values)
    937   unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
    938   unsigned int FromType;
    939   // The last operand holds the original LoadSDNode::getExtensionType() value
    940   unsigned ExtensionType = cast<ConstantSDNode>(
    941       N->getOperand(N->getNumOperands() - 1))->getZExtValue();
    942   if (ExtensionType == ISD::SEXTLOAD)
    943     FromType = NVPTX::PTXLdStInstCode::Signed;
    944   else if (ScalarVT.isFloatingPoint())
    945     FromType = NVPTX::PTXLdStInstCode::Float;
    946   else
    947     FromType = NVPTX::PTXLdStInstCode::Unsigned;
    948 
    949   unsigned VecType;
    950 
    951   switch (N->getOpcode()) {
    952   case NVPTXISD::LoadV2:
    953     VecType = NVPTX::PTXLdStInstCode::V2;
    954     break;
    955   case NVPTXISD::LoadV4:
    956     VecType = NVPTX::PTXLdStInstCode::V4;
    957     break;
    958   default:
    959     return nullptr;
    960   }
    961 
    962   EVT EltVT = N->getValueType(0);
    963 
    964   if (SelectDirectAddr(Op1, Addr)) {
    965     switch (N->getOpcode()) {
    966     default:
    967       return nullptr;
    968     case NVPTXISD::LoadV2:
    969       switch (EltVT.getSimpleVT().SimpleTy) {
    970       default:
    971         return nullptr;
    972       case MVT::i8:
    973         Opcode = NVPTX::LDV_i8_v2_avar;
    974         break;
    975       case MVT::i16:
    976         Opcode = NVPTX::LDV_i16_v2_avar;
    977         break;
    978       case MVT::i32:
    979         Opcode = NVPTX::LDV_i32_v2_avar;
    980         break;
    981       case MVT::i64:
    982         Opcode = NVPTX::LDV_i64_v2_avar;
    983         break;
    984       case MVT::f32:
    985         Opcode = NVPTX::LDV_f32_v2_avar;
    986         break;
    987       case MVT::f64:
    988         Opcode = NVPTX::LDV_f64_v2_avar;
    989         break;
    990       }
    991       break;
    992     case NVPTXISD::LoadV4:
    993       switch (EltVT.getSimpleVT().SimpleTy) {
    994       default:
    995         return nullptr;
    996       case MVT::i8:
    997         Opcode = NVPTX::LDV_i8_v4_avar;
    998         break;
    999       case MVT::i16:
   1000         Opcode = NVPTX::LDV_i16_v4_avar;
   1001         break;
   1002       case MVT::i32:
   1003         Opcode = NVPTX::LDV_i32_v4_avar;
   1004         break;
   1005       case MVT::f32:
   1006         Opcode = NVPTX::LDV_f32_v4_avar;
   1007         break;
   1008       }
   1009       break;
   1010     }
   1011 
   1012     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
   1013                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
   1014                       getI32Imm(FromTypeWidth, DL), Addr, Chain };
   1015     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1016   } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
   1017                           : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
   1018     switch (N->getOpcode()) {
   1019     default:
   1020       return nullptr;
   1021     case NVPTXISD::LoadV2:
   1022       switch (EltVT.getSimpleVT().SimpleTy) {
   1023       default:
   1024         return nullptr;
   1025       case MVT::i8:
   1026         Opcode = NVPTX::LDV_i8_v2_asi;
   1027         break;
   1028       case MVT::i16:
   1029         Opcode = NVPTX::LDV_i16_v2_asi;
   1030         break;
   1031       case MVT::i32:
   1032         Opcode = NVPTX::LDV_i32_v2_asi;
   1033         break;
   1034       case MVT::i64:
   1035         Opcode = NVPTX::LDV_i64_v2_asi;
   1036         break;
   1037       case MVT::f32:
   1038         Opcode = NVPTX::LDV_f32_v2_asi;
   1039         break;
   1040       case MVT::f64:
   1041         Opcode = NVPTX::LDV_f64_v2_asi;
   1042         break;
   1043       }
   1044       break;
   1045     case NVPTXISD::LoadV4:
   1046       switch (EltVT.getSimpleVT().SimpleTy) {
   1047       default:
   1048         return nullptr;
   1049       case MVT::i8:
   1050         Opcode = NVPTX::LDV_i8_v4_asi;
   1051         break;
   1052       case MVT::i16:
   1053         Opcode = NVPTX::LDV_i16_v4_asi;
   1054         break;
   1055       case MVT::i32:
   1056         Opcode = NVPTX::LDV_i32_v4_asi;
   1057         break;
   1058       case MVT::f32:
   1059         Opcode = NVPTX::LDV_f32_v4_asi;
   1060         break;
   1061       }
   1062       break;
   1063     }
   1064 
   1065     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
   1066                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
   1067                       getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
   1068     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1069   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
   1070                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
   1071     if (TM.is64Bit()) {
   1072       switch (N->getOpcode()) {
   1073       default:
   1074         return nullptr;
   1075       case NVPTXISD::LoadV2:
   1076         switch (EltVT.getSimpleVT().SimpleTy) {
   1077         default:
   1078           return nullptr;
   1079         case MVT::i8:
   1080           Opcode = NVPTX::LDV_i8_v2_ari_64;
   1081           break;
   1082         case MVT::i16:
   1083           Opcode = NVPTX::LDV_i16_v2_ari_64;
   1084           break;
   1085         case MVT::i32:
   1086           Opcode = NVPTX::LDV_i32_v2_ari_64;
   1087           break;
   1088         case MVT::i64:
   1089           Opcode = NVPTX::LDV_i64_v2_ari_64;
   1090           break;
   1091         case MVT::f32:
   1092           Opcode = NVPTX::LDV_f32_v2_ari_64;
   1093           break;
   1094         case MVT::f64:
   1095           Opcode = NVPTX::LDV_f64_v2_ari_64;
   1096           break;
   1097         }
   1098         break;
   1099       case NVPTXISD::LoadV4:
   1100         switch (EltVT.getSimpleVT().SimpleTy) {
   1101         default:
   1102           return nullptr;
   1103         case MVT::i8:
   1104           Opcode = NVPTX::LDV_i8_v4_ari_64;
   1105           break;
   1106         case MVT::i16:
   1107           Opcode = NVPTX::LDV_i16_v4_ari_64;
   1108           break;
   1109         case MVT::i32:
   1110           Opcode = NVPTX::LDV_i32_v4_ari_64;
   1111           break;
   1112         case MVT::f32:
   1113           Opcode = NVPTX::LDV_f32_v4_ari_64;
   1114           break;
   1115         }
   1116         break;
   1117       }
   1118     } else {
   1119       switch (N->getOpcode()) {
   1120       default:
   1121         return nullptr;
   1122       case NVPTXISD::LoadV2:
   1123         switch (EltVT.getSimpleVT().SimpleTy) {
   1124         default:
   1125           return nullptr;
   1126         case MVT::i8:
   1127           Opcode = NVPTX::LDV_i8_v2_ari;
   1128           break;
   1129         case MVT::i16:
   1130           Opcode = NVPTX::LDV_i16_v2_ari;
   1131           break;
   1132         case MVT::i32:
   1133           Opcode = NVPTX::LDV_i32_v2_ari;
   1134           break;
   1135         case MVT::i64:
   1136           Opcode = NVPTX::LDV_i64_v2_ari;
   1137           break;
   1138         case MVT::f32:
   1139           Opcode = NVPTX::LDV_f32_v2_ari;
   1140           break;
   1141         case MVT::f64:
   1142           Opcode = NVPTX::LDV_f64_v2_ari;
   1143           break;
   1144         }
   1145         break;
   1146       case NVPTXISD::LoadV4:
   1147         switch (EltVT.getSimpleVT().SimpleTy) {
   1148         default:
   1149           return nullptr;
   1150         case MVT::i8:
   1151           Opcode = NVPTX::LDV_i8_v4_ari;
   1152           break;
   1153         case MVT::i16:
   1154           Opcode = NVPTX::LDV_i16_v4_ari;
   1155           break;
   1156         case MVT::i32:
   1157           Opcode = NVPTX::LDV_i32_v4_ari;
   1158           break;
   1159         case MVT::f32:
   1160           Opcode = NVPTX::LDV_f32_v4_ari;
   1161           break;
   1162         }
   1163         break;
   1164       }
   1165     }
   1166 
   1167     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
   1168                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
   1169                       getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
   1170 
   1171     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1172   } else {
   1173     if (TM.is64Bit()) {
   1174       switch (N->getOpcode()) {
   1175       default:
   1176         return nullptr;
   1177       case NVPTXISD::LoadV2:
   1178         switch (EltVT.getSimpleVT().SimpleTy) {
   1179         default:
   1180           return nullptr;
   1181         case MVT::i8:
   1182           Opcode = NVPTX::LDV_i8_v2_areg_64;
   1183           break;
   1184         case MVT::i16:
   1185           Opcode = NVPTX::LDV_i16_v2_areg_64;
   1186           break;
   1187         case MVT::i32:
   1188           Opcode = NVPTX::LDV_i32_v2_areg_64;
   1189           break;
   1190         case MVT::i64:
   1191           Opcode = NVPTX::LDV_i64_v2_areg_64;
   1192           break;
   1193         case MVT::f32:
   1194           Opcode = NVPTX::LDV_f32_v2_areg_64;
   1195           break;
   1196         case MVT::f64:
   1197           Opcode = NVPTX::LDV_f64_v2_areg_64;
   1198           break;
   1199         }
   1200         break;
   1201       case NVPTXISD::LoadV4:
   1202         switch (EltVT.getSimpleVT().SimpleTy) {
   1203         default:
   1204           return nullptr;
   1205         case MVT::i8:
   1206           Opcode = NVPTX::LDV_i8_v4_areg_64;
   1207           break;
   1208         case MVT::i16:
   1209           Opcode = NVPTX::LDV_i16_v4_areg_64;
   1210           break;
   1211         case MVT::i32:
   1212           Opcode = NVPTX::LDV_i32_v4_areg_64;
   1213           break;
   1214         case MVT::f32:
   1215           Opcode = NVPTX::LDV_f32_v4_areg_64;
   1216           break;
   1217         }
   1218         break;
   1219       }
   1220     } else {
   1221       switch (N->getOpcode()) {
   1222       default:
   1223         return nullptr;
   1224       case NVPTXISD::LoadV2:
   1225         switch (EltVT.getSimpleVT().SimpleTy) {
   1226         default:
   1227           return nullptr;
   1228         case MVT::i8:
   1229           Opcode = NVPTX::LDV_i8_v2_areg;
   1230           break;
   1231         case MVT::i16:
   1232           Opcode = NVPTX::LDV_i16_v2_areg;
   1233           break;
   1234         case MVT::i32:
   1235           Opcode = NVPTX::LDV_i32_v2_areg;
   1236           break;
   1237         case MVT::i64:
   1238           Opcode = NVPTX::LDV_i64_v2_areg;
   1239           break;
   1240         case MVT::f32:
   1241           Opcode = NVPTX::LDV_f32_v2_areg;
   1242           break;
   1243         case MVT::f64:
   1244           Opcode = NVPTX::LDV_f64_v2_areg;
   1245           break;
   1246         }
   1247         break;
   1248       case NVPTXISD::LoadV4:
   1249         switch (EltVT.getSimpleVT().SimpleTy) {
   1250         default:
   1251           return nullptr;
   1252         case MVT::i8:
   1253           Opcode = NVPTX::LDV_i8_v4_areg;
   1254           break;
   1255         case MVT::i16:
   1256           Opcode = NVPTX::LDV_i16_v4_areg;
   1257           break;
   1258         case MVT::i32:
   1259           Opcode = NVPTX::LDV_i32_v4_areg;
   1260           break;
   1261         case MVT::f32:
   1262           Opcode = NVPTX::LDV_f32_v4_areg;
   1263           break;
   1264         }
   1265         break;
   1266       }
   1267     }
   1268 
   1269     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
   1270                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
   1271                       getI32Imm(FromTypeWidth, DL), Op1, Chain };
   1272     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1273   }
   1274 
   1275   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   1276   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   1277   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
   1278 
   1279   return LD;
   1280 }
   1281 
   1282 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
   1283 
   1284   SDValue Chain = N->getOperand(0);
   1285   SDValue Op1;
   1286   MemSDNode *Mem;
   1287   bool IsLDG = true;
   1288 
   1289   // If this is an LDG intrinsic, the address is the third operand. Its its an
   1290   // LDG/LDU SD node (from custom vector handling), then its the second operand
   1291   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
   1292     Op1 = N->getOperand(2);
   1293     Mem = cast<MemIntrinsicSDNode>(N);
   1294     unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
   1295     switch (IID) {
   1296     default:
   1297       return NULL;
   1298     case Intrinsic::nvvm_ldg_global_f:
   1299     case Intrinsic::nvvm_ldg_global_i:
   1300     case Intrinsic::nvvm_ldg_global_p:
   1301       IsLDG = true;
   1302       break;
   1303     case Intrinsic::nvvm_ldu_global_f:
   1304     case Intrinsic::nvvm_ldu_global_i:
   1305     case Intrinsic::nvvm_ldu_global_p:
   1306       IsLDG = false;
   1307       break;
   1308     }
   1309   } else {
   1310     Op1 = N->getOperand(1);
   1311     Mem = cast<MemSDNode>(N);
   1312   }
   1313 
   1314   unsigned Opcode;
   1315   SDLoc DL(N);
   1316   SDNode *LD;
   1317   SDValue Base, Offset, Addr;
   1318 
   1319   EVT EltVT = Mem->getMemoryVT();
   1320   if (EltVT.isVector()) {
   1321     EltVT = EltVT.getVectorElementType();
   1322   }
   1323 
   1324   if (SelectDirectAddr(Op1, Addr)) {
   1325     switch (N->getOpcode()) {
   1326     default:
   1327       return nullptr;
   1328     case ISD::INTRINSIC_W_CHAIN:
   1329       if (IsLDG) {
   1330         switch (EltVT.getSimpleVT().SimpleTy) {
   1331         default:
   1332           return nullptr;
   1333         case MVT::i8:
   1334           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
   1335           break;
   1336         case MVT::i16:
   1337           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
   1338           break;
   1339         case MVT::i32:
   1340           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
   1341           break;
   1342         case MVT::i64:
   1343           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
   1344           break;
   1345         case MVT::f32:
   1346           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
   1347           break;
   1348         case MVT::f64:
   1349           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
   1350           break;
   1351         }
   1352       } else {
   1353         switch (EltVT.getSimpleVT().SimpleTy) {
   1354         default:
   1355           return nullptr;
   1356         case MVT::i8:
   1357           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
   1358           break;
   1359         case MVT::i16:
   1360           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
   1361           break;
   1362         case MVT::i32:
   1363           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
   1364           break;
   1365         case MVT::i64:
   1366           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
   1367           break;
   1368         case MVT::f32:
   1369           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
   1370           break;
   1371         case MVT::f64:
   1372           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
   1373           break;
   1374         }
   1375       }
   1376       break;
   1377     case NVPTXISD::LDGV2:
   1378       switch (EltVT.getSimpleVT().SimpleTy) {
   1379       default:
   1380         return nullptr;
   1381       case MVT::i8:
   1382         Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
   1383         break;
   1384       case MVT::i16:
   1385         Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
   1386         break;
   1387       case MVT::i32:
   1388         Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
   1389         break;
   1390       case MVT::i64:
   1391         Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
   1392         break;
   1393       case MVT::f32:
   1394         Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
   1395         break;
   1396       case MVT::f64:
   1397         Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
   1398         break;
   1399       }
   1400       break;
   1401     case NVPTXISD::LDUV2:
   1402       switch (EltVT.getSimpleVT().SimpleTy) {
   1403       default:
   1404         return nullptr;
   1405       case MVT::i8:
   1406         Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
   1407         break;
   1408       case MVT::i16:
   1409         Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
   1410         break;
   1411       case MVT::i32:
   1412         Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
   1413         break;
   1414       case MVT::i64:
   1415         Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
   1416         break;
   1417       case MVT::f32:
   1418         Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
   1419         break;
   1420       case MVT::f64:
   1421         Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
   1422         break;
   1423       }
   1424       break;
   1425     case NVPTXISD::LDGV4:
   1426       switch (EltVT.getSimpleVT().SimpleTy) {
   1427       default:
   1428         return nullptr;
   1429       case MVT::i8:
   1430         Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
   1431         break;
   1432       case MVT::i16:
   1433         Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
   1434         break;
   1435       case MVT::i32:
   1436         Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
   1437         break;
   1438       case MVT::f32:
   1439         Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
   1440         break;
   1441       }
   1442       break;
   1443     case NVPTXISD::LDUV4:
   1444       switch (EltVT.getSimpleVT().SimpleTy) {
   1445       default:
   1446         return nullptr;
   1447       case MVT::i8:
   1448         Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
   1449         break;
   1450       case MVT::i16:
   1451         Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
   1452         break;
   1453       case MVT::i32:
   1454         Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
   1455         break;
   1456       case MVT::f32:
   1457         Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
   1458         break;
   1459       }
   1460       break;
   1461     }
   1462 
   1463     SDValue Ops[] = { Addr, Chain };
   1464     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1465   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
   1466                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
   1467     if (TM.is64Bit()) {
   1468       switch (N->getOpcode()) {
   1469       default:
   1470         return nullptr;
   1471       case ISD::LOAD:
   1472       case ISD::INTRINSIC_W_CHAIN:
   1473         if (IsLDG) {
   1474           switch (EltVT.getSimpleVT().SimpleTy) {
   1475           default:
   1476             return nullptr;
   1477           case MVT::i8:
   1478             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
   1479             break;
   1480           case MVT::i16:
   1481             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
   1482             break;
   1483           case MVT::i32:
   1484             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
   1485             break;
   1486           case MVT::i64:
   1487             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
   1488             break;
   1489           case MVT::f32:
   1490             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
   1491             break;
   1492           case MVT::f64:
   1493             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
   1494             break;
   1495           }
   1496         } else {
   1497           switch (EltVT.getSimpleVT().SimpleTy) {
   1498           default:
   1499             return nullptr;
   1500           case MVT::i8:
   1501             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
   1502             break;
   1503           case MVT::i16:
   1504             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
   1505             break;
   1506           case MVT::i32:
   1507             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
   1508             break;
   1509           case MVT::i64:
   1510             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
   1511             break;
   1512           case MVT::f32:
   1513             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
   1514             break;
   1515           case MVT::f64:
   1516             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
   1517             break;
   1518           }
   1519         }
   1520         break;
   1521       case NVPTXISD::LoadV2:
   1522       case NVPTXISD::LDGV2:
   1523         switch (EltVT.getSimpleVT().SimpleTy) {
   1524         default:
   1525           return nullptr;
   1526         case MVT::i8:
   1527           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
   1528           break;
   1529         case MVT::i16:
   1530           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
   1531           break;
   1532         case MVT::i32:
   1533           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
   1534           break;
   1535         case MVT::i64:
   1536           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
   1537           break;
   1538         case MVT::f32:
   1539           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
   1540           break;
   1541         case MVT::f64:
   1542           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
   1543           break;
   1544         }
   1545         break;
   1546       case NVPTXISD::LDUV2:
   1547         switch (EltVT.getSimpleVT().SimpleTy) {
   1548         default:
   1549           return nullptr;
   1550         case MVT::i8:
   1551           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
   1552           break;
   1553         case MVT::i16:
   1554           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
   1555           break;
   1556         case MVT::i32:
   1557           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
   1558           break;
   1559         case MVT::i64:
   1560           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
   1561           break;
   1562         case MVT::f32:
   1563           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
   1564           break;
   1565         case MVT::f64:
   1566           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
   1567           break;
   1568         }
   1569         break;
   1570       case NVPTXISD::LoadV4:
   1571       case NVPTXISD::LDGV4:
   1572         switch (EltVT.getSimpleVT().SimpleTy) {
   1573         default:
   1574           return nullptr;
   1575         case MVT::i8:
   1576           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
   1577           break;
   1578         case MVT::i16:
   1579           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
   1580           break;
   1581         case MVT::i32:
   1582           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
   1583           break;
   1584         case MVT::f32:
   1585           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
   1586           break;
   1587         }
   1588         break;
   1589       case NVPTXISD::LDUV4:
   1590         switch (EltVT.getSimpleVT().SimpleTy) {
   1591         default:
   1592           return nullptr;
   1593         case MVT::i8:
   1594           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
   1595           break;
   1596         case MVT::i16:
   1597           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
   1598           break;
   1599         case MVT::i32:
   1600           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
   1601           break;
   1602         case MVT::f32:
   1603           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
   1604           break;
   1605         }
   1606         break;
   1607       }
   1608     } else {
   1609       switch (N->getOpcode()) {
   1610       default:
   1611         return nullptr;
   1612       case ISD::LOAD:
   1613       case ISD::INTRINSIC_W_CHAIN:
   1614         if (IsLDG) {
   1615           switch (EltVT.getSimpleVT().SimpleTy) {
   1616           default:
   1617             return nullptr;
   1618           case MVT::i8:
   1619             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
   1620             break;
   1621           case MVT::i16:
   1622             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
   1623             break;
   1624           case MVT::i32:
   1625             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
   1626             break;
   1627           case MVT::i64:
   1628             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
   1629             break;
   1630           case MVT::f32:
   1631             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
   1632             break;
   1633           case MVT::f64:
   1634             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
   1635             break;
   1636           }
   1637         } else {
   1638           switch (EltVT.getSimpleVT().SimpleTy) {
   1639           default:
   1640             return nullptr;
   1641           case MVT::i8:
   1642             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
   1643             break;
   1644           case MVT::i16:
   1645             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
   1646             break;
   1647           case MVT::i32:
   1648             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
   1649             break;
   1650           case MVT::i64:
   1651             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
   1652             break;
   1653           case MVT::f32:
   1654             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
   1655             break;
   1656           case MVT::f64:
   1657             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
   1658             break;
   1659           }
   1660         }
   1661         break;
   1662       case NVPTXISD::LoadV2:
   1663       case NVPTXISD::LDGV2:
   1664         switch (EltVT.getSimpleVT().SimpleTy) {
   1665         default:
   1666           return nullptr;
   1667         case MVT::i8:
   1668           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
   1669           break;
   1670         case MVT::i16:
   1671           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
   1672           break;
   1673         case MVT::i32:
   1674           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
   1675           break;
   1676         case MVT::i64:
   1677           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
   1678           break;
   1679         case MVT::f32:
   1680           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
   1681           break;
   1682         case MVT::f64:
   1683           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
   1684           break;
   1685         }
   1686         break;
   1687       case NVPTXISD::LDUV2:
   1688         switch (EltVT.getSimpleVT().SimpleTy) {
   1689         default:
   1690           return nullptr;
   1691         case MVT::i8:
   1692           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
   1693           break;
   1694         case MVT::i16:
   1695           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
   1696           break;
   1697         case MVT::i32:
   1698           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
   1699           break;
   1700         case MVT::i64:
   1701           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
   1702           break;
   1703         case MVT::f32:
   1704           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
   1705           break;
   1706         case MVT::f64:
   1707           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
   1708           break;
   1709         }
   1710         break;
   1711       case NVPTXISD::LoadV4:
   1712       case NVPTXISD::LDGV4:
   1713         switch (EltVT.getSimpleVT().SimpleTy) {
   1714         default:
   1715           return nullptr;
   1716         case MVT::i8:
   1717           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
   1718           break;
   1719         case MVT::i16:
   1720           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
   1721           break;
   1722         case MVT::i32:
   1723           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
   1724           break;
   1725         case MVT::f32:
   1726           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
   1727           break;
   1728         }
   1729         break;
   1730       case NVPTXISD::LDUV4:
   1731         switch (EltVT.getSimpleVT().SimpleTy) {
   1732         default:
   1733           return nullptr;
   1734         case MVT::i8:
   1735           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
   1736           break;
   1737         case MVT::i16:
   1738           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
   1739           break;
   1740         case MVT::i32:
   1741           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
   1742           break;
   1743         case MVT::f32:
   1744           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
   1745           break;
   1746         }
   1747         break;
   1748       }
   1749     }
   1750 
   1751     SDValue Ops[] = { Base, Offset, Chain };
   1752 
   1753     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1754   } else {
   1755     if (TM.is64Bit()) {
   1756       switch (N->getOpcode()) {
   1757       default:
   1758         return nullptr;
   1759       case ISD::LOAD:
   1760       case ISD::INTRINSIC_W_CHAIN:
   1761         if (IsLDG) {
   1762           switch (EltVT.getSimpleVT().SimpleTy) {
   1763           default:
   1764             return nullptr;
   1765           case MVT::i8:
   1766             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
   1767             break;
   1768           case MVT::i16:
   1769             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
   1770             break;
   1771           case MVT::i32:
   1772             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
   1773             break;
   1774           case MVT::i64:
   1775             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
   1776             break;
   1777           case MVT::f32:
   1778             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
   1779             break;
   1780           case MVT::f64:
   1781             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
   1782             break;
   1783           }
   1784         } else {
   1785           switch (EltVT.getSimpleVT().SimpleTy) {
   1786           default:
   1787             return nullptr;
   1788           case MVT::i8:
   1789             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
   1790             break;
   1791           case MVT::i16:
   1792             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
   1793             break;
   1794           case MVT::i32:
   1795             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
   1796             break;
   1797           case MVT::i64:
   1798             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
   1799             break;
   1800           case MVT::f32:
   1801             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
   1802             break;
   1803           case MVT::f64:
   1804             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
   1805             break;
   1806           }
   1807         }
   1808         break;
   1809       case NVPTXISD::LoadV2:
   1810       case NVPTXISD::LDGV2:
   1811         switch (EltVT.getSimpleVT().SimpleTy) {
   1812         default:
   1813           return nullptr;
   1814         case MVT::i8:
   1815           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
   1816           break;
   1817         case MVT::i16:
   1818           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
   1819           break;
   1820         case MVT::i32:
   1821           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
   1822           break;
   1823         case MVT::i64:
   1824           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
   1825           break;
   1826         case MVT::f32:
   1827           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
   1828           break;
   1829         case MVT::f64:
   1830           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
   1831           break;
   1832         }
   1833         break;
   1834       case NVPTXISD::LDUV2:
   1835         switch (EltVT.getSimpleVT().SimpleTy) {
   1836         default:
   1837           return nullptr;
   1838         case MVT::i8:
   1839           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
   1840           break;
   1841         case MVT::i16:
   1842           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
   1843           break;
   1844         case MVT::i32:
   1845           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
   1846           break;
   1847         case MVT::i64:
   1848           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
   1849           break;
   1850         case MVT::f32:
   1851           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
   1852           break;
   1853         case MVT::f64:
   1854           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
   1855           break;
   1856         }
   1857         break;
   1858       case NVPTXISD::LoadV4:
   1859       case NVPTXISD::LDGV4:
   1860         switch (EltVT.getSimpleVT().SimpleTy) {
   1861         default:
   1862           return nullptr;
   1863         case MVT::i8:
   1864           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
   1865           break;
   1866         case MVT::i16:
   1867           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
   1868           break;
   1869         case MVT::i32:
   1870           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
   1871           break;
   1872         case MVT::f32:
   1873           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
   1874           break;
   1875         }
   1876         break;
   1877       case NVPTXISD::LDUV4:
   1878         switch (EltVT.getSimpleVT().SimpleTy) {
   1879         default:
   1880           return nullptr;
   1881         case MVT::i8:
   1882           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
   1883           break;
   1884         case MVT::i16:
   1885           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
   1886           break;
   1887         case MVT::i32:
   1888           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
   1889           break;
   1890         case MVT::f32:
   1891           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
   1892           break;
   1893         }
   1894         break;
   1895       }
   1896     } else {
   1897       switch (N->getOpcode()) {
   1898       default:
   1899         return nullptr;
   1900       case ISD::LOAD:
   1901       case ISD::INTRINSIC_W_CHAIN:
   1902         if (IsLDG) {
   1903           switch (EltVT.getSimpleVT().SimpleTy) {
   1904           default:
   1905             return nullptr;
   1906           case MVT::i8:
   1907             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
   1908             break;
   1909           case MVT::i16:
   1910             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
   1911             break;
   1912           case MVT::i32:
   1913             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
   1914             break;
   1915           case MVT::i64:
   1916             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
   1917             break;
   1918           case MVT::f32:
   1919             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
   1920             break;
   1921           case MVT::f64:
   1922             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
   1923             break;
   1924           }
   1925         } else {
   1926           switch (EltVT.getSimpleVT().SimpleTy) {
   1927           default:
   1928             return nullptr;
   1929           case MVT::i8:
   1930             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
   1931             break;
   1932           case MVT::i16:
   1933             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
   1934             break;
   1935           case MVT::i32:
   1936             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
   1937             break;
   1938           case MVT::i64:
   1939             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
   1940             break;
   1941           case MVT::f32:
   1942             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
   1943             break;
   1944           case MVT::f64:
   1945             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
   1946             break;
   1947           }
   1948         }
   1949         break;
   1950       case NVPTXISD::LoadV2:
   1951       case NVPTXISD::LDGV2:
   1952         switch (EltVT.getSimpleVT().SimpleTy) {
   1953         default:
   1954           return nullptr;
   1955         case MVT::i8:
   1956           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
   1957           break;
   1958         case MVT::i16:
   1959           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
   1960           break;
   1961         case MVT::i32:
   1962           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
   1963           break;
   1964         case MVT::i64:
   1965           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
   1966           break;
   1967         case MVT::f32:
   1968           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
   1969           break;
   1970         case MVT::f64:
   1971           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
   1972           break;
   1973         }
   1974         break;
   1975       case NVPTXISD::LDUV2:
   1976         switch (EltVT.getSimpleVT().SimpleTy) {
   1977         default:
   1978           return nullptr;
   1979         case MVT::i8:
   1980           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
   1981           break;
   1982         case MVT::i16:
   1983           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
   1984           break;
   1985         case MVT::i32:
   1986           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
   1987           break;
   1988         case MVT::i64:
   1989           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
   1990           break;
   1991         case MVT::f32:
   1992           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
   1993           break;
   1994         case MVT::f64:
   1995           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
   1996           break;
   1997         }
   1998         break;
   1999       case NVPTXISD::LoadV4:
   2000       case NVPTXISD::LDGV4:
   2001         switch (EltVT.getSimpleVT().SimpleTy) {
   2002         default:
   2003           return nullptr;
   2004         case MVT::i8:
   2005           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
   2006           break;
   2007         case MVT::i16:
   2008           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
   2009           break;
   2010         case MVT::i32:
   2011           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
   2012           break;
   2013         case MVT::f32:
   2014           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
   2015           break;
   2016         }
   2017         break;
   2018       case NVPTXISD::LDUV4:
   2019         switch (EltVT.getSimpleVT().SimpleTy) {
   2020         default:
   2021           return nullptr;
   2022         case MVT::i8:
   2023           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
   2024           break;
   2025         case MVT::i16:
   2026           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
   2027           break;
   2028         case MVT::i32:
   2029           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
   2030           break;
   2031         case MVT::f32:
   2032           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
   2033           break;
   2034         }
   2035         break;
   2036       }
   2037     }
   2038 
   2039     SDValue Ops[] = { Op1, Chain };
   2040     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   2041   }
   2042 
   2043   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2044   MemRefs0[0] = Mem->getMemOperand();
   2045   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2046 
   2047   return LD;
   2048 }
   2049 
   2050 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
   2051   SDLoc dl(N);
   2052   StoreSDNode *ST = cast<StoreSDNode>(N);
   2053   EVT StoreVT = ST->getMemoryVT();
   2054   SDNode *NVPTXST = nullptr;
   2055 
   2056   // do not support pre/post inc/dec
   2057   if (ST->isIndexed())
   2058     return nullptr;
   2059 
   2060   if (!StoreVT.isSimple())
   2061     return nullptr;
   2062 
   2063   // Address Space Setting
   2064   unsigned int codeAddrSpace = getCodeAddrSpace(ST);
   2065 
   2066   // Volatile Setting
   2067   // - .volatile is only availalble for .global and .shared
   2068   bool isVolatile = ST->isVolatile();
   2069   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
   2070       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
   2071       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
   2072     isVolatile = false;
   2073 
   2074   // Vector Setting
   2075   MVT SimpleVT = StoreVT.getSimpleVT();
   2076   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
   2077   if (SimpleVT.isVector()) {
   2078     unsigned num = SimpleVT.getVectorNumElements();
   2079     if (num == 2)
   2080       vecType = NVPTX::PTXLdStInstCode::V2;
   2081     else if (num == 4)
   2082       vecType = NVPTX::PTXLdStInstCode::V4;
   2083     else
   2084       return nullptr;
   2085   }
   2086 
   2087   // Type Setting: toType + toTypeWidth
   2088   // - for integer type, always use 'u'
   2089   //
   2090   MVT ScalarVT = SimpleVT.getScalarType();
   2091   unsigned toTypeWidth = ScalarVT.getSizeInBits();
   2092   unsigned int toType;
   2093   if (ScalarVT.isFloatingPoint())
   2094     toType = NVPTX::PTXLdStInstCode::Float;
   2095   else
   2096     toType = NVPTX::PTXLdStInstCode::Unsigned;
   2097 
   2098   // Create the machine instruction DAG
   2099   SDValue Chain = N->getOperand(0);
   2100   SDValue N1 = N->getOperand(1);
   2101   SDValue N2 = N->getOperand(2);
   2102   SDValue Addr;
   2103   SDValue Offset, Base;
   2104   unsigned Opcode;
   2105   MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
   2106 
   2107   if (SelectDirectAddr(N2, Addr)) {
   2108     switch (SourceVT) {
   2109     case MVT::i8:
   2110       Opcode = NVPTX::ST_i8_avar;
   2111       break;
   2112     case MVT::i16:
   2113       Opcode = NVPTX::ST_i16_avar;
   2114       break;
   2115     case MVT::i32:
   2116       Opcode = NVPTX::ST_i32_avar;
   2117       break;
   2118     case MVT::i64:
   2119       Opcode = NVPTX::ST_i64_avar;
   2120       break;
   2121     case MVT::f32:
   2122       Opcode = NVPTX::ST_f32_avar;
   2123       break;
   2124     case MVT::f64:
   2125       Opcode = NVPTX::ST_f64_avar;
   2126       break;
   2127     default:
   2128       return nullptr;
   2129     }
   2130     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
   2131                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
   2132                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
   2133                       Chain };
   2134     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   2135   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
   2136                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
   2137     switch (SourceVT) {
   2138     case MVT::i8:
   2139       Opcode = NVPTX::ST_i8_asi;
   2140       break;
   2141     case MVT::i16:
   2142       Opcode = NVPTX::ST_i16_asi;
   2143       break;
   2144     case MVT::i32:
   2145       Opcode = NVPTX::ST_i32_asi;
   2146       break;
   2147     case MVT::i64:
   2148       Opcode = NVPTX::ST_i64_asi;
   2149       break;
   2150     case MVT::f32:
   2151       Opcode = NVPTX::ST_f32_asi;
   2152       break;
   2153     case MVT::f64:
   2154       Opcode = NVPTX::ST_f64_asi;
   2155       break;
   2156     default:
   2157       return nullptr;
   2158     }
   2159     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
   2160                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
   2161                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
   2162                       Offset, Chain };
   2163     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   2164   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
   2165                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
   2166     if (TM.is64Bit()) {
   2167       switch (SourceVT) {
   2168       case MVT::i8:
   2169         Opcode = NVPTX::ST_i8_ari_64;
   2170         break;
   2171       case MVT::i16:
   2172         Opcode = NVPTX::ST_i16_ari_64;
   2173         break;
   2174       case MVT::i32:
   2175         Opcode = NVPTX::ST_i32_ari_64;
   2176         break;
   2177       case MVT::i64:
   2178         Opcode = NVPTX::ST_i64_ari_64;
   2179         break;
   2180       case MVT::f32:
   2181         Opcode = NVPTX::ST_f32_ari_64;
   2182         break;
   2183       case MVT::f64:
   2184         Opcode = NVPTX::ST_f64_ari_64;
   2185         break;
   2186       default:
   2187         return nullptr;
   2188       }
   2189     } else {
   2190       switch (SourceVT) {
   2191       case MVT::i8:
   2192         Opcode = NVPTX::ST_i8_ari;
   2193         break;
   2194       case MVT::i16:
   2195         Opcode = NVPTX::ST_i16_ari;
   2196         break;
   2197       case MVT::i32:
   2198         Opcode = NVPTX::ST_i32_ari;
   2199         break;
   2200       case MVT::i64:
   2201         Opcode = NVPTX::ST_i64_ari;
   2202         break;
   2203       case MVT::f32:
   2204         Opcode = NVPTX::ST_f32_ari;
   2205         break;
   2206       case MVT::f64:
   2207         Opcode = NVPTX::ST_f64_ari;
   2208         break;
   2209       default:
   2210         return nullptr;
   2211       }
   2212     }
   2213     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
   2214                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
   2215                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
   2216                       Offset, Chain };
   2217     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   2218   } else {
   2219     if (TM.is64Bit()) {
   2220       switch (SourceVT) {
   2221       case MVT::i8:
   2222         Opcode = NVPTX::ST_i8_areg_64;
   2223         break;
   2224       case MVT::i16:
   2225         Opcode = NVPTX::ST_i16_areg_64;
   2226         break;
   2227       case MVT::i32:
   2228         Opcode = NVPTX::ST_i32_areg_64;
   2229         break;
   2230       case MVT::i64:
   2231         Opcode = NVPTX::ST_i64_areg_64;
   2232         break;
   2233       case MVT::f32:
   2234         Opcode = NVPTX::ST_f32_areg_64;
   2235         break;
   2236       case MVT::f64:
   2237         Opcode = NVPTX::ST_f64_areg_64;
   2238         break;
   2239       default:
   2240         return nullptr;
   2241       }
   2242     } else {
   2243       switch (SourceVT) {
   2244       case MVT::i8:
   2245         Opcode = NVPTX::ST_i8_areg;
   2246         break;
   2247       case MVT::i16:
   2248         Opcode = NVPTX::ST_i16_areg;
   2249         break;
   2250       case MVT::i32:
   2251         Opcode = NVPTX::ST_i32_areg;
   2252         break;
   2253       case MVT::i64:
   2254         Opcode = NVPTX::ST_i64_areg;
   2255         break;
   2256       case MVT::f32:
   2257         Opcode = NVPTX::ST_f32_areg;
   2258         break;
   2259       case MVT::f64:
   2260         Opcode = NVPTX::ST_f64_areg;
   2261         break;
   2262       default:
   2263         return nullptr;
   2264       }
   2265     }
   2266     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
   2267                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
   2268                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
   2269                       Chain };
   2270     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   2271   }
   2272 
   2273   if (NVPTXST) {
   2274     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2275     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2276     cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2277   }
   2278 
   2279   return NVPTXST;
   2280 }
   2281 
   2282 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
   2283   SDValue Chain = N->getOperand(0);
   2284   SDValue Op1 = N->getOperand(1);
   2285   SDValue Addr, Offset, Base;
   2286   unsigned Opcode;
   2287   SDLoc DL(N);
   2288   SDNode *ST;
   2289   EVT EltVT = Op1.getValueType();
   2290   MemSDNode *MemSD = cast<MemSDNode>(N);
   2291   EVT StoreVT = MemSD->getMemoryVT();
   2292 
   2293   // Address Space Setting
   2294   unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
   2295 
   2296   if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
   2297     report_fatal_error("Cannot store to pointer that points to constant "
   2298                        "memory space");
   2299   }
   2300 
   2301   // Volatile Setting
   2302   // - .volatile is only availalble for .global and .shared
   2303   bool IsVolatile = MemSD->isVolatile();
   2304   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
   2305       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
   2306       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
   2307     IsVolatile = false;
   2308 
   2309   // Type Setting: toType + toTypeWidth
   2310   // - for integer type, always use 'u'
   2311   assert(StoreVT.isSimple() && "Store value is not simple");
   2312   MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
   2313   unsigned ToTypeWidth = ScalarVT.getSizeInBits();
   2314   unsigned ToType;
   2315   if (ScalarVT.isFloatingPoint())
   2316     ToType = NVPTX::PTXLdStInstCode::Float;
   2317   else
   2318     ToType = NVPTX::PTXLdStInstCode::Unsigned;
   2319 
   2320   SmallVector<SDValue, 12> StOps;
   2321   SDValue N2;
   2322   unsigned VecType;
   2323 
   2324   switch (N->getOpcode()) {
   2325   case NVPTXISD::StoreV2:
   2326     VecType = NVPTX::PTXLdStInstCode::V2;
   2327     StOps.push_back(N->getOperand(1));
   2328     StOps.push_back(N->getOperand(2));
   2329     N2 = N->getOperand(3);
   2330     break;
   2331   case NVPTXISD::StoreV4:
   2332     VecType = NVPTX::PTXLdStInstCode::V4;
   2333     StOps.push_back(N->getOperand(1));
   2334     StOps.push_back(N->getOperand(2));
   2335     StOps.push_back(N->getOperand(3));
   2336     StOps.push_back(N->getOperand(4));
   2337     N2 = N->getOperand(5);
   2338     break;
   2339   default:
   2340     return nullptr;
   2341   }
   2342 
   2343   StOps.push_back(getI32Imm(IsVolatile, DL));
   2344   StOps.push_back(getI32Imm(CodeAddrSpace, DL));
   2345   StOps.push_back(getI32Imm(VecType, DL));
   2346   StOps.push_back(getI32Imm(ToType, DL));
   2347   StOps.push_back(getI32Imm(ToTypeWidth, DL));
   2348 
   2349   if (SelectDirectAddr(N2, Addr)) {
   2350     switch (N->getOpcode()) {
   2351     default:
   2352       return nullptr;
   2353     case NVPTXISD::StoreV2:
   2354       switch (EltVT.getSimpleVT().SimpleTy) {
   2355       default:
   2356         return nullptr;
   2357       case MVT::i8:
   2358         Opcode = NVPTX::STV_i8_v2_avar;
   2359         break;
   2360       case MVT::i16:
   2361         Opcode = NVPTX::STV_i16_v2_avar;
   2362         break;
   2363       case MVT::i32:
   2364         Opcode = NVPTX::STV_i32_v2_avar;
   2365         break;
   2366       case MVT::i64:
   2367         Opcode = NVPTX::STV_i64_v2_avar;
   2368         break;
   2369       case MVT::f32:
   2370         Opcode = NVPTX::STV_f32_v2_avar;
   2371         break;
   2372       case MVT::f64:
   2373         Opcode = NVPTX::STV_f64_v2_avar;
   2374         break;
   2375       }
   2376       break;
   2377     case NVPTXISD::StoreV4:
   2378       switch (EltVT.getSimpleVT().SimpleTy) {
   2379       default:
   2380         return nullptr;
   2381       case MVT::i8:
   2382         Opcode = NVPTX::STV_i8_v4_avar;
   2383         break;
   2384       case MVT::i16:
   2385         Opcode = NVPTX::STV_i16_v4_avar;
   2386         break;
   2387       case MVT::i32:
   2388         Opcode = NVPTX::STV_i32_v4_avar;
   2389         break;
   2390       case MVT::f32:
   2391         Opcode = NVPTX::STV_f32_v4_avar;
   2392         break;
   2393       }
   2394       break;
   2395     }
   2396     StOps.push_back(Addr);
   2397   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
   2398                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
   2399     switch (N->getOpcode()) {
   2400     default:
   2401       return nullptr;
   2402     case NVPTXISD::StoreV2:
   2403       switch (EltVT.getSimpleVT().SimpleTy) {
   2404       default:
   2405         return nullptr;
   2406       case MVT::i8:
   2407         Opcode = NVPTX::STV_i8_v2_asi;
   2408         break;
   2409       case MVT::i16:
   2410         Opcode = NVPTX::STV_i16_v2_asi;
   2411         break;
   2412       case MVT::i32:
   2413         Opcode = NVPTX::STV_i32_v2_asi;
   2414         break;
   2415       case MVT::i64:
   2416         Opcode = NVPTX::STV_i64_v2_asi;
   2417         break;
   2418       case MVT::f32:
   2419         Opcode = NVPTX::STV_f32_v2_asi;
   2420         break;
   2421       case MVT::f64:
   2422         Opcode = NVPTX::STV_f64_v2_asi;
   2423         break;
   2424       }
   2425       break;
   2426     case NVPTXISD::StoreV4:
   2427       switch (EltVT.getSimpleVT().SimpleTy) {
   2428       default:
   2429         return nullptr;
   2430       case MVT::i8:
   2431         Opcode = NVPTX::STV_i8_v4_asi;
   2432         break;
   2433       case MVT::i16:
   2434         Opcode = NVPTX::STV_i16_v4_asi;
   2435         break;
   2436       case MVT::i32:
   2437         Opcode = NVPTX::STV_i32_v4_asi;
   2438         break;
   2439       case MVT::f32:
   2440         Opcode = NVPTX::STV_f32_v4_asi;
   2441         break;
   2442       }
   2443       break;
   2444     }
   2445     StOps.push_back(Base);
   2446     StOps.push_back(Offset);
   2447   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
   2448                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
   2449     if (TM.is64Bit()) {
   2450       switch (N->getOpcode()) {
   2451       default:
   2452         return nullptr;
   2453       case NVPTXISD::StoreV2:
   2454         switch (EltVT.getSimpleVT().SimpleTy) {
   2455         default:
   2456           return nullptr;
   2457         case MVT::i8:
   2458           Opcode = NVPTX::STV_i8_v2_ari_64;
   2459           break;
   2460         case MVT::i16:
   2461           Opcode = NVPTX::STV_i16_v2_ari_64;
   2462           break;
   2463         case MVT::i32:
   2464           Opcode = NVPTX::STV_i32_v2_ari_64;
   2465           break;
   2466         case MVT::i64:
   2467           Opcode = NVPTX::STV_i64_v2_ari_64;
   2468           break;
   2469         case MVT::f32:
   2470           Opcode = NVPTX::STV_f32_v2_ari_64;
   2471           break;
   2472         case MVT::f64:
   2473           Opcode = NVPTX::STV_f64_v2_ari_64;
   2474           break;
   2475         }
   2476         break;
   2477       case NVPTXISD::StoreV4:
   2478         switch (EltVT.getSimpleVT().SimpleTy) {
   2479         default:
   2480           return nullptr;
   2481         case MVT::i8:
   2482           Opcode = NVPTX::STV_i8_v4_ari_64;
   2483           break;
   2484         case MVT::i16:
   2485           Opcode = NVPTX::STV_i16_v4_ari_64;
   2486           break;
   2487         case MVT::i32:
   2488           Opcode = NVPTX::STV_i32_v4_ari_64;
   2489           break;
   2490         case MVT::f32:
   2491           Opcode = NVPTX::STV_f32_v4_ari_64;
   2492           break;
   2493         }
   2494         break;
   2495       }
   2496     } else {
   2497       switch (N->getOpcode()) {
   2498       default:
   2499         return nullptr;
   2500       case NVPTXISD::StoreV2:
   2501         switch (EltVT.getSimpleVT().SimpleTy) {
   2502         default:
   2503           return nullptr;
   2504         case MVT::i8:
   2505           Opcode = NVPTX::STV_i8_v2_ari;
   2506           break;
   2507         case MVT::i16:
   2508           Opcode = NVPTX::STV_i16_v2_ari;
   2509           break;
   2510         case MVT::i32:
   2511           Opcode = NVPTX::STV_i32_v2_ari;
   2512           break;
   2513         case MVT::i64:
   2514           Opcode = NVPTX::STV_i64_v2_ari;
   2515           break;
   2516         case MVT::f32:
   2517           Opcode = NVPTX::STV_f32_v2_ari;
   2518           break;
   2519         case MVT::f64:
   2520           Opcode = NVPTX::STV_f64_v2_ari;
   2521           break;
   2522         }
   2523         break;
   2524       case NVPTXISD::StoreV4:
   2525         switch (EltVT.getSimpleVT().SimpleTy) {
   2526         default:
   2527           return nullptr;
   2528         case MVT::i8:
   2529           Opcode = NVPTX::STV_i8_v4_ari;
   2530           break;
   2531         case MVT::i16:
   2532           Opcode = NVPTX::STV_i16_v4_ari;
   2533           break;
   2534         case MVT::i32:
   2535           Opcode = NVPTX::STV_i32_v4_ari;
   2536           break;
   2537         case MVT::f32:
   2538           Opcode = NVPTX::STV_f32_v4_ari;
   2539           break;
   2540         }
   2541         break;
   2542       }
   2543     }
   2544     StOps.push_back(Base);
   2545     StOps.push_back(Offset);
   2546   } else {
   2547     if (TM.is64Bit()) {
   2548       switch (N->getOpcode()) {
   2549       default:
   2550         return nullptr;
   2551       case NVPTXISD::StoreV2:
   2552         switch (EltVT.getSimpleVT().SimpleTy) {
   2553         default:
   2554           return nullptr;
   2555         case MVT::i8:
   2556           Opcode = NVPTX::STV_i8_v2_areg_64;
   2557           break;
   2558         case MVT::i16:
   2559           Opcode = NVPTX::STV_i16_v2_areg_64;
   2560           break;
   2561         case MVT::i32:
   2562           Opcode = NVPTX::STV_i32_v2_areg_64;
   2563           break;
   2564         case MVT::i64:
   2565           Opcode = NVPTX::STV_i64_v2_areg_64;
   2566           break;
   2567         case MVT::f32:
   2568           Opcode = NVPTX::STV_f32_v2_areg_64;
   2569           break;
   2570         case MVT::f64:
   2571           Opcode = NVPTX::STV_f64_v2_areg_64;
   2572           break;
   2573         }
   2574         break;
   2575       case NVPTXISD::StoreV4:
   2576         switch (EltVT.getSimpleVT().SimpleTy) {
   2577         default:
   2578           return nullptr;
   2579         case MVT::i8:
   2580           Opcode = NVPTX::STV_i8_v4_areg_64;
   2581           break;
   2582         case MVT::i16:
   2583           Opcode = NVPTX::STV_i16_v4_areg_64;
   2584           break;
   2585         case MVT::i32:
   2586           Opcode = NVPTX::STV_i32_v4_areg_64;
   2587           break;
   2588         case MVT::f32:
   2589           Opcode = NVPTX::STV_f32_v4_areg_64;
   2590           break;
   2591         }
   2592         break;
   2593       }
   2594     } else {
   2595       switch (N->getOpcode()) {
   2596       default:
   2597         return nullptr;
   2598       case NVPTXISD::StoreV2:
   2599         switch (EltVT.getSimpleVT().SimpleTy) {
   2600         default:
   2601           return nullptr;
   2602         case MVT::i8:
   2603           Opcode = NVPTX::STV_i8_v2_areg;
   2604           break;
   2605         case MVT::i16:
   2606           Opcode = NVPTX::STV_i16_v2_areg;
   2607           break;
   2608         case MVT::i32:
   2609           Opcode = NVPTX::STV_i32_v2_areg;
   2610           break;
   2611         case MVT::i64:
   2612           Opcode = NVPTX::STV_i64_v2_areg;
   2613           break;
   2614         case MVT::f32:
   2615           Opcode = NVPTX::STV_f32_v2_areg;
   2616           break;
   2617         case MVT::f64:
   2618           Opcode = NVPTX::STV_f64_v2_areg;
   2619           break;
   2620         }
   2621         break;
   2622       case NVPTXISD::StoreV4:
   2623         switch (EltVT.getSimpleVT().SimpleTy) {
   2624         default:
   2625           return nullptr;
   2626         case MVT::i8:
   2627           Opcode = NVPTX::STV_i8_v4_areg;
   2628           break;
   2629         case MVT::i16:
   2630           Opcode = NVPTX::STV_i16_v4_areg;
   2631           break;
   2632         case MVT::i32:
   2633           Opcode = NVPTX::STV_i32_v4_areg;
   2634           break;
   2635         case MVT::f32:
   2636           Opcode = NVPTX::STV_f32_v4_areg;
   2637           break;
   2638         }
   2639         break;
   2640       }
   2641     }
   2642     StOps.push_back(N2);
   2643   }
   2644 
   2645   StOps.push_back(Chain);
   2646 
   2647   ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
   2648 
   2649   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2650   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2651   cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2652 
   2653   return ST;
   2654 }
   2655 
   2656 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
   2657   SDValue Chain = Node->getOperand(0);
   2658   SDValue Offset = Node->getOperand(2);
   2659   SDValue Flag = Node->getOperand(3);
   2660   SDLoc DL(Node);
   2661   MemSDNode *Mem = cast<MemSDNode>(Node);
   2662 
   2663   unsigned VecSize;
   2664   switch (Node->getOpcode()) {
   2665   default:
   2666     return nullptr;
   2667   case NVPTXISD::LoadParam:
   2668     VecSize = 1;
   2669     break;
   2670   case NVPTXISD::LoadParamV2:
   2671     VecSize = 2;
   2672     break;
   2673   case NVPTXISD::LoadParamV4:
   2674     VecSize = 4;
   2675     break;
   2676   }
   2677 
   2678   EVT EltVT = Node->getValueType(0);
   2679   EVT MemVT = Mem->getMemoryVT();
   2680 
   2681   unsigned Opc = 0;
   2682 
   2683   switch (VecSize) {
   2684   default:
   2685     return nullptr;
   2686   case 1:
   2687     switch (MemVT.getSimpleVT().SimpleTy) {
   2688     default:
   2689       return nullptr;
   2690     case MVT::i1:
   2691       Opc = NVPTX::LoadParamMemI8;
   2692       break;
   2693     case MVT::i8:
   2694       Opc = NVPTX::LoadParamMemI8;
   2695       break;
   2696     case MVT::i16:
   2697       Opc = NVPTX::LoadParamMemI16;
   2698       break;
   2699     case MVT::i32:
   2700       Opc = NVPTX::LoadParamMemI32;
   2701       break;
   2702     case MVT::i64:
   2703       Opc = NVPTX::LoadParamMemI64;
   2704       break;
   2705     case MVT::f32:
   2706       Opc = NVPTX::LoadParamMemF32;
   2707       break;
   2708     case MVT::f64:
   2709       Opc = NVPTX::LoadParamMemF64;
   2710       break;
   2711     }
   2712     break;
   2713   case 2:
   2714     switch (MemVT.getSimpleVT().SimpleTy) {
   2715     default:
   2716       return nullptr;
   2717     case MVT::i1:
   2718       Opc = NVPTX::LoadParamMemV2I8;
   2719       break;
   2720     case MVT::i8:
   2721       Opc = NVPTX::LoadParamMemV2I8;
   2722       break;
   2723     case MVT::i16:
   2724       Opc = NVPTX::LoadParamMemV2I16;
   2725       break;
   2726     case MVT::i32:
   2727       Opc = NVPTX::LoadParamMemV2I32;
   2728       break;
   2729     case MVT::i64:
   2730       Opc = NVPTX::LoadParamMemV2I64;
   2731       break;
   2732     case MVT::f32:
   2733       Opc = NVPTX::LoadParamMemV2F32;
   2734       break;
   2735     case MVT::f64:
   2736       Opc = NVPTX::LoadParamMemV2F64;
   2737       break;
   2738     }
   2739     break;
   2740   case 4:
   2741     switch (MemVT.getSimpleVT().SimpleTy) {
   2742     default:
   2743       return nullptr;
   2744     case MVT::i1:
   2745       Opc = NVPTX::LoadParamMemV4I8;
   2746       break;
   2747     case MVT::i8:
   2748       Opc = NVPTX::LoadParamMemV4I8;
   2749       break;
   2750     case MVT::i16:
   2751       Opc = NVPTX::LoadParamMemV4I16;
   2752       break;
   2753     case MVT::i32:
   2754       Opc = NVPTX::LoadParamMemV4I32;
   2755       break;
   2756     case MVT::f32:
   2757       Opc = NVPTX::LoadParamMemV4F32;
   2758       break;
   2759     }
   2760     break;
   2761   }
   2762 
   2763   SDVTList VTs;
   2764   if (VecSize == 1) {
   2765     VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
   2766   } else if (VecSize == 2) {
   2767     VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
   2768   } else {
   2769     EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
   2770     VTs = CurDAG->getVTList(EVTs);
   2771   }
   2772 
   2773   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
   2774 
   2775   SmallVector<SDValue, 2> Ops;
   2776   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
   2777   Ops.push_back(Chain);
   2778   Ops.push_back(Flag);
   2779 
   2780   return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
   2781 }
   2782 
   2783 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
   2784   SDLoc DL(N);
   2785   SDValue Chain = N->getOperand(0);
   2786   SDValue Offset = N->getOperand(1);
   2787   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
   2788   MemSDNode *Mem = cast<MemSDNode>(N);
   2789 
   2790   // How many elements do we have?
   2791   unsigned NumElts = 1;
   2792   switch (N->getOpcode()) {
   2793   default:
   2794     return nullptr;
   2795   case NVPTXISD::StoreRetval:
   2796     NumElts = 1;
   2797     break;
   2798   case NVPTXISD::StoreRetvalV2:
   2799     NumElts = 2;
   2800     break;
   2801   case NVPTXISD::StoreRetvalV4:
   2802     NumElts = 4;
   2803     break;
   2804   }
   2805 
   2806   // Build vector of operands
   2807   SmallVector<SDValue, 6> Ops;
   2808   for (unsigned i = 0; i < NumElts; ++i)
   2809     Ops.push_back(N->getOperand(i + 2));
   2810   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
   2811   Ops.push_back(Chain);
   2812 
   2813   // Determine target opcode
   2814   // If we have an i1, use an 8-bit store. The lowering code in
   2815   // NVPTXISelLowering will have already emitted an upcast.
   2816   unsigned Opcode = 0;
   2817   switch (NumElts) {
   2818   default:
   2819     return nullptr;
   2820   case 1:
   2821     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2822     default:
   2823       return nullptr;
   2824     case MVT::i1:
   2825       Opcode = NVPTX::StoreRetvalI8;
   2826       break;
   2827     case MVT::i8:
   2828       Opcode = NVPTX::StoreRetvalI8;
   2829       break;
   2830     case MVT::i16:
   2831       Opcode = NVPTX::StoreRetvalI16;
   2832       break;
   2833     case MVT::i32:
   2834       Opcode = NVPTX::StoreRetvalI32;
   2835       break;
   2836     case MVT::i64:
   2837       Opcode = NVPTX::StoreRetvalI64;
   2838       break;
   2839     case MVT::f32:
   2840       Opcode = NVPTX::StoreRetvalF32;
   2841       break;
   2842     case MVT::f64:
   2843       Opcode = NVPTX::StoreRetvalF64;
   2844       break;
   2845     }
   2846     break;
   2847   case 2:
   2848     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2849     default:
   2850       return nullptr;
   2851     case MVT::i1:
   2852       Opcode = NVPTX::StoreRetvalV2I8;
   2853       break;
   2854     case MVT::i8:
   2855       Opcode = NVPTX::StoreRetvalV2I8;
   2856       break;
   2857     case MVT::i16:
   2858       Opcode = NVPTX::StoreRetvalV2I16;
   2859       break;
   2860     case MVT::i32:
   2861       Opcode = NVPTX::StoreRetvalV2I32;
   2862       break;
   2863     case MVT::i64:
   2864       Opcode = NVPTX::StoreRetvalV2I64;
   2865       break;
   2866     case MVT::f32:
   2867       Opcode = NVPTX::StoreRetvalV2F32;
   2868       break;
   2869     case MVT::f64:
   2870       Opcode = NVPTX::StoreRetvalV2F64;
   2871       break;
   2872     }
   2873     break;
   2874   case 4:
   2875     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2876     default:
   2877       return nullptr;
   2878     case MVT::i1:
   2879       Opcode = NVPTX::StoreRetvalV4I8;
   2880       break;
   2881     case MVT::i8:
   2882       Opcode = NVPTX::StoreRetvalV4I8;
   2883       break;
   2884     case MVT::i16:
   2885       Opcode = NVPTX::StoreRetvalV4I16;
   2886       break;
   2887     case MVT::i32:
   2888       Opcode = NVPTX::StoreRetvalV4I32;
   2889       break;
   2890     case MVT::f32:
   2891       Opcode = NVPTX::StoreRetvalV4F32;
   2892       break;
   2893     }
   2894     break;
   2895   }
   2896 
   2897   SDNode *Ret =
   2898       CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
   2899   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2900   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2901   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2902 
   2903   return Ret;
   2904 }
   2905 
   2906 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
   2907   SDLoc DL(N);
   2908   SDValue Chain = N->getOperand(0);
   2909   SDValue Param = N->getOperand(1);
   2910   unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
   2911   SDValue Offset = N->getOperand(2);
   2912   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
   2913   MemSDNode *Mem = cast<MemSDNode>(N);
   2914   SDValue Flag = N->getOperand(N->getNumOperands() - 1);
   2915 
   2916   // How many elements do we have?
   2917   unsigned NumElts = 1;
   2918   switch (N->getOpcode()) {
   2919   default:
   2920     return nullptr;
   2921   case NVPTXISD::StoreParamU32:
   2922   case NVPTXISD::StoreParamS32:
   2923   case NVPTXISD::StoreParam:
   2924     NumElts = 1;
   2925     break;
   2926   case NVPTXISD::StoreParamV2:
   2927     NumElts = 2;
   2928     break;
   2929   case NVPTXISD::StoreParamV4:
   2930     NumElts = 4;
   2931     break;
   2932   }
   2933 
   2934   // Build vector of operands
   2935   SmallVector<SDValue, 8> Ops;
   2936   for (unsigned i = 0; i < NumElts; ++i)
   2937     Ops.push_back(N->getOperand(i + 3));
   2938   Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
   2939   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
   2940   Ops.push_back(Chain);
   2941   Ops.push_back(Flag);
   2942 
   2943   // Determine target opcode
   2944   // If we have an i1, use an 8-bit store. The lowering code in
   2945   // NVPTXISelLowering will have already emitted an upcast.
   2946   unsigned Opcode = 0;
   2947   switch (N->getOpcode()) {
   2948   default:
   2949     switch (NumElts) {
   2950     default:
   2951       return nullptr;
   2952     case 1:
   2953       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2954       default:
   2955         return nullptr;
   2956       case MVT::i1:
   2957         Opcode = NVPTX::StoreParamI8;
   2958         break;
   2959       case MVT::i8:
   2960         Opcode = NVPTX::StoreParamI8;
   2961         break;
   2962       case MVT::i16:
   2963         Opcode = NVPTX::StoreParamI16;
   2964         break;
   2965       case MVT::i32:
   2966         Opcode = NVPTX::StoreParamI32;
   2967         break;
   2968       case MVT::i64:
   2969         Opcode = NVPTX::StoreParamI64;
   2970         break;
   2971       case MVT::f32:
   2972         Opcode = NVPTX::StoreParamF32;
   2973         break;
   2974       case MVT::f64:
   2975         Opcode = NVPTX::StoreParamF64;
   2976         break;
   2977       }
   2978       break;
   2979     case 2:
   2980       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2981       default:
   2982         return nullptr;
   2983       case MVT::i1:
   2984         Opcode = NVPTX::StoreParamV2I8;
   2985         break;
   2986       case MVT::i8:
   2987         Opcode = NVPTX::StoreParamV2I8;
   2988         break;
   2989       case MVT::i16:
   2990         Opcode = NVPTX::StoreParamV2I16;
   2991         break;
   2992       case MVT::i32:
   2993         Opcode = NVPTX::StoreParamV2I32;
   2994         break;
   2995       case MVT::i64:
   2996         Opcode = NVPTX::StoreParamV2I64;
   2997         break;
   2998       case MVT::f32:
   2999         Opcode = NVPTX::StoreParamV2F32;
   3000         break;
   3001       case MVT::f64:
   3002         Opcode = NVPTX::StoreParamV2F64;
   3003         break;
   3004       }
   3005       break;
   3006     case 4:
   3007       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   3008       default:
   3009         return nullptr;
   3010       case MVT::i1:
   3011         Opcode = NVPTX::StoreParamV4I8;
   3012         break;
   3013       case MVT::i8:
   3014         Opcode = NVPTX::StoreParamV4I8;
   3015         break;
   3016       case MVT::i16:
   3017         Opcode = NVPTX::StoreParamV4I16;
   3018         break;
   3019       case MVT::i32:
   3020         Opcode = NVPTX::StoreParamV4I32;
   3021         break;
   3022       case MVT::f32:
   3023         Opcode = NVPTX::StoreParamV4F32;
   3024         break;
   3025       }
   3026       break;
   3027     }
   3028     break;
   3029   // Special case: if we have a sign-extend/zero-extend node, insert the
   3030   // conversion instruction first, and use that as the value operand to
   3031   // the selected StoreParam node.
   3032   case NVPTXISD::StoreParamU32: {
   3033     Opcode = NVPTX::StoreParamI32;
   3034     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
   3035                                                 MVT::i32);
   3036     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
   3037                                          MVT::i32, Ops[0], CvtNone);
   3038     Ops[0] = SDValue(Cvt, 0);
   3039     break;
   3040   }
   3041   case NVPTXISD::StoreParamS32: {
   3042     Opcode = NVPTX::StoreParamI32;
   3043     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
   3044                                                 MVT::i32);
   3045     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
   3046                                          MVT::i32, Ops[0], CvtNone);
   3047     Ops[0] = SDValue(Cvt, 0);
   3048     break;
   3049   }
   3050   }
   3051 
   3052   SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
   3053   SDNode *Ret =
   3054       CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
   3055   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   3056   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   3057   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
   3058 
   3059   return Ret;
   3060 }
   3061 
   3062 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
   3063   SDValue Chain = N->getOperand(0);
   3064   SDNode *Ret = nullptr;
   3065   unsigned Opc = 0;
   3066   SmallVector<SDValue, 8> Ops;
   3067 
   3068   switch (N->getOpcode()) {
   3069   default: return nullptr;
   3070   case NVPTXISD::Tex1DFloatS32:
   3071     Opc = NVPTX::TEX_1D_F32_S32;
   3072     break;
   3073   case NVPTXISD::Tex1DFloatFloat:
   3074     Opc = NVPTX::TEX_1D_F32_F32;
   3075     break;
   3076   case NVPTXISD::Tex1DFloatFloatLevel:
   3077     Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
   3078     break;
   3079   case NVPTXISD::Tex1DFloatFloatGrad:
   3080     Opc = NVPTX::TEX_1D_F32_F32_GRAD;
   3081     break;
   3082   case NVPTXISD::Tex1DS32S32:
   3083     Opc = NVPTX::TEX_1D_S32_S32;
   3084     break;
   3085   case NVPTXISD::Tex1DS32Float:
   3086     Opc = NVPTX::TEX_1D_S32_F32;
   3087     break;
   3088   case NVPTXISD::Tex1DS32FloatLevel:
   3089     Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
   3090     break;
   3091   case NVPTXISD::Tex1DS32FloatGrad:
   3092     Opc = NVPTX::TEX_1D_S32_F32_GRAD;
   3093     break;
   3094   case NVPTXISD::Tex1DU32S32:
   3095     Opc = NVPTX::TEX_1D_U32_S32;
   3096     break;
   3097   case NVPTXISD::Tex1DU32Float:
   3098     Opc = NVPTX::TEX_1D_U32_F32;
   3099     break;
   3100   case NVPTXISD::Tex1DU32FloatLevel:
   3101     Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
   3102     break;
   3103   case NVPTXISD::Tex1DU32FloatGrad:
   3104     Opc = NVPTX::TEX_1D_U32_F32_GRAD;
   3105     break;
   3106   case NVPTXISD::Tex1DArrayFloatS32:
   3107     Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
   3108     break;
   3109   case NVPTXISD::Tex1DArrayFloatFloat:
   3110     Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
   3111     break;
   3112   case NVPTXISD::Tex1DArrayFloatFloatLevel:
   3113     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
   3114     break;
   3115   case NVPTXISD::Tex1DArrayFloatFloatGrad:
   3116     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
   3117     break;
   3118   case NVPTXISD::Tex1DArrayS32S32:
   3119     Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
   3120     break;
   3121   case NVPTXISD::Tex1DArrayS32Float:
   3122     Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
   3123     break;
   3124   case NVPTXISD::Tex1DArrayS32FloatLevel:
   3125     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
   3126     break;
   3127   case NVPTXISD::Tex1DArrayS32FloatGrad:
   3128     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
   3129     break;
   3130   case NVPTXISD::Tex1DArrayU32S32:
   3131     Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
   3132     break;
   3133   case NVPTXISD::Tex1DArrayU32Float:
   3134     Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
   3135     break;
   3136   case NVPTXISD::Tex1DArrayU32FloatLevel:
   3137     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
   3138     break;
   3139   case NVPTXISD::Tex1DArrayU32FloatGrad:
   3140     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
   3141     break;
   3142   case NVPTXISD::Tex2DFloatS32:
   3143     Opc = NVPTX::TEX_2D_F32_S32;
   3144     break;
   3145   case NVPTXISD::Tex2DFloatFloat:
   3146     Opc = NVPTX::TEX_2D_F32_F32;
   3147     break;
   3148   case NVPTXISD::Tex2DFloatFloatLevel:
   3149     Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
   3150     break;
   3151   case NVPTXISD::Tex2DFloatFloatGrad:
   3152     Opc = NVPTX::TEX_2D_F32_F32_GRAD;
   3153     break;
   3154   case NVPTXISD::Tex2DS32S32:
   3155     Opc = NVPTX::TEX_2D_S32_S32;
   3156     break;
   3157   case NVPTXISD::Tex2DS32Float:
   3158     Opc = NVPTX::TEX_2D_S32_F32;
   3159     break;
   3160   case NVPTXISD::Tex2DS32FloatLevel:
   3161     Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
   3162     break;
   3163   case NVPTXISD::Tex2DS32FloatGrad:
   3164     Opc = NVPTX::TEX_2D_S32_F32_GRAD;
   3165     break;
   3166   case NVPTXISD::Tex2DU32S32:
   3167     Opc = NVPTX::TEX_2D_U32_S32;
   3168     break;
   3169   case NVPTXISD::Tex2DU32Float:
   3170     Opc = NVPTX::TEX_2D_U32_F32;
   3171     break;
   3172   case NVPTXISD::Tex2DU32FloatLevel:
   3173     Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
   3174     break;
   3175   case NVPTXISD::Tex2DU32FloatGrad:
   3176     Opc = NVPTX::TEX_2D_U32_F32_GRAD;
   3177     break;
   3178   case NVPTXISD::Tex2DArrayFloatS32:
   3179     Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
   3180     break;
   3181   case NVPTXISD::Tex2DArrayFloatFloat:
   3182     Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
   3183     break;
   3184   case NVPTXISD::Tex2DArrayFloatFloatLevel:
   3185     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
   3186     break;
   3187   case NVPTXISD::Tex2DArrayFloatFloatGrad:
   3188     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
   3189     break;
   3190   case NVPTXISD::Tex2DArrayS32S32:
   3191     Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
   3192     break;
   3193   case NVPTXISD::Tex2DArrayS32Float:
   3194     Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
   3195     break;
   3196   case NVPTXISD::Tex2DArrayS32FloatLevel:
   3197     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
   3198     break;
   3199   case NVPTXISD::Tex2DArrayS32FloatGrad:
   3200     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
   3201     break;
   3202   case NVPTXISD::Tex2DArrayU32S32:
   3203     Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
   3204     break;
   3205   case NVPTXISD::Tex2DArrayU32Float:
   3206     Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
   3207     break;
   3208   case NVPTXISD::Tex2DArrayU32FloatLevel:
   3209     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
   3210     break;
   3211   case NVPTXISD::Tex2DArrayU32FloatGrad:
   3212     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
   3213     break;
   3214   case NVPTXISD::Tex3DFloatS32:
   3215     Opc = NVPTX::TEX_3D_F32_S32;
   3216     break;
   3217   case NVPTXISD::Tex3DFloatFloat:
   3218     Opc = NVPTX::TEX_3D_F32_F32;
   3219     break;
   3220   case NVPTXISD::Tex3DFloatFloatLevel:
   3221     Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
   3222     break;
   3223   case NVPTXISD::Tex3DFloatFloatGrad:
   3224     Opc = NVPTX::TEX_3D_F32_F32_GRAD;
   3225     break;
   3226   case NVPTXISD::Tex3DS32S32:
   3227     Opc = NVPTX::TEX_3D_S32_S32;
   3228     break;
   3229   case NVPTXISD::Tex3DS32Float:
   3230     Opc = NVPTX::TEX_3D_S32_F32;
   3231     break;
   3232   case NVPTXISD::Tex3DS32FloatLevel:
   3233     Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
   3234     break;
   3235   case NVPTXISD::Tex3DS32FloatGrad:
   3236     Opc = NVPTX::TEX_3D_S32_F32_GRAD;
   3237     break;
   3238   case NVPTXISD::Tex3DU32S32:
   3239     Opc = NVPTX::TEX_3D_U32_S32;
   3240     break;
   3241   case NVPTXISD::Tex3DU32Float:
   3242     Opc = NVPTX::TEX_3D_U32_F32;
   3243     break;
   3244   case NVPTXISD::Tex3DU32FloatLevel:
   3245     Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
   3246     break;
   3247   case NVPTXISD::Tex3DU32FloatGrad:
   3248     Opc = NVPTX::TEX_3D_U32_F32_GRAD;
   3249     break;
   3250   case NVPTXISD::TexCubeFloatFloat:
   3251     Opc = NVPTX::TEX_CUBE_F32_F32;
   3252     break;
   3253   case NVPTXISD::TexCubeFloatFloatLevel:
   3254     Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
   3255     break;
   3256   case NVPTXISD::TexCubeS32Float:
   3257     Opc = NVPTX::TEX_CUBE_S32_F32;
   3258     break;
   3259   case NVPTXISD::TexCubeS32FloatLevel:
   3260     Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
   3261     break;
   3262   case NVPTXISD::TexCubeU32Float:
   3263     Opc = NVPTX::TEX_CUBE_U32_F32;
   3264     break;
   3265   case NVPTXISD::TexCubeU32FloatLevel:
   3266     Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
   3267     break;
   3268   case NVPTXISD::TexCubeArrayFloatFloat:
   3269     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
   3270     break;
   3271   case NVPTXISD::TexCubeArrayFloatFloatLevel:
   3272     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
   3273     break;
   3274   case NVPTXISD::TexCubeArrayS32Float:
   3275     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
   3276     break;
   3277   case NVPTXISD::TexCubeArrayS32FloatLevel:
   3278     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
   3279     break;
   3280   case NVPTXISD::TexCubeArrayU32Float:
   3281     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
   3282     break;
   3283   case NVPTXISD::TexCubeArrayU32FloatLevel:
   3284     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
   3285     break;
   3286   case NVPTXISD::Tld4R2DFloatFloat:
   3287     Opc = NVPTX::TLD4_R_2D_F32_F32;
   3288     break;
   3289   case NVPTXISD::Tld4G2DFloatFloat:
   3290     Opc = NVPTX::TLD4_G_2D_F32_F32;
   3291     break;
   3292   case NVPTXISD::Tld4B2DFloatFloat:
   3293     Opc = NVPTX::TLD4_B_2D_F32_F32;
   3294     break;
   3295   case NVPTXISD::Tld4A2DFloatFloat:
   3296     Opc = NVPTX::TLD4_A_2D_F32_F32;
   3297     break;
   3298   case NVPTXISD::Tld4R2DS64Float:
   3299     Opc = NVPTX::TLD4_R_2D_S32_F32;
   3300     break;
   3301   case NVPTXISD::Tld4G2DS64Float:
   3302     Opc = NVPTX::TLD4_G_2D_S32_F32;
   3303     break;
   3304   case NVPTXISD::Tld4B2DS64Float:
   3305     Opc = NVPTX::TLD4_B_2D_S32_F32;
   3306     break;
   3307   case NVPTXISD::Tld4A2DS64Float:
   3308     Opc = NVPTX::TLD4_A_2D_S32_F32;
   3309     break;
   3310   case NVPTXISD::Tld4R2DU64Float:
   3311     Opc = NVPTX::TLD4_R_2D_U32_F32;
   3312     break;
   3313   case NVPTXISD::Tld4G2DU64Float:
   3314     Opc = NVPTX::TLD4_G_2D_U32_F32;
   3315     break;
   3316   case NVPTXISD::Tld4B2DU64Float:
   3317     Opc = NVPTX::TLD4_B_2D_U32_F32;
   3318     break;
   3319   case NVPTXISD::Tld4A2DU64Float:
   3320     Opc = NVPTX::TLD4_A_2D_U32_F32;
   3321     break;
   3322   case NVPTXISD::TexUnified1DFloatS32:
   3323     Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
   3324     break;
   3325   case NVPTXISD::TexUnified1DFloatFloat:
   3326     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
   3327     break;
   3328   case NVPTXISD::TexUnified1DFloatFloatLevel:
   3329     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
   3330     break;
   3331   case NVPTXISD::TexUnified1DFloatFloatGrad:
   3332     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
   3333     break;
   3334   case NVPTXISD::TexUnified1DS32S32:
   3335     Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
   3336     break;
   3337   case NVPTXISD::TexUnified1DS32Float:
   3338     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
   3339     break;
   3340   case NVPTXISD::TexUnified1DS32FloatLevel:
   3341     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
   3342     break;
   3343   case NVPTXISD::TexUnified1DS32FloatGrad:
   3344     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
   3345     break;
   3346   case NVPTXISD::TexUnified1DU32S32:
   3347     Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
   3348     break;
   3349   case NVPTXISD::TexUnified1DU32Float:
   3350     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
   3351     break;
   3352   case NVPTXISD::TexUnified1DU32FloatLevel:
   3353     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
   3354     break;
   3355   case NVPTXISD::TexUnified1DU32FloatGrad:
   3356     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
   3357     break;
   3358   case NVPTXISD::TexUnified1DArrayFloatS32:
   3359     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
   3360     break;
   3361   case NVPTXISD::TexUnified1DArrayFloatFloat:
   3362     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
   3363     break;
   3364   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
   3365     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
   3366     break;
   3367   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
   3368     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
   3369     break;
   3370   case NVPTXISD::TexUnified1DArrayS32S32:
   3371     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
   3372     break;
   3373   case NVPTXISD::TexUnified1DArrayS32Float:
   3374     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
   3375     break;
   3376   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
   3377     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
   3378     break;
   3379   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
   3380     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
   3381     break;
   3382   case NVPTXISD::TexUnified1DArrayU32S32:
   3383     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
   3384     break;
   3385   case NVPTXISD::TexUnified1DArrayU32Float:
   3386     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
   3387     break;
   3388   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
   3389     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
   3390     break;
   3391   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
   3392     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
   3393     break;
   3394   case NVPTXISD::TexUnified2DFloatS32:
   3395     Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
   3396     break;
   3397   case NVPTXISD::TexUnified2DFloatFloat:
   3398     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
   3399     break;
   3400   case NVPTXISD::TexUnified2DFloatFloatLevel:
   3401     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
   3402     break;
   3403   case NVPTXISD::TexUnified2DFloatFloatGrad:
   3404     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
   3405     break;
   3406   case NVPTXISD::TexUnified2DS32S32:
   3407     Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
   3408     break;
   3409   case NVPTXISD::TexUnified2DS32Float:
   3410     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
   3411     break;
   3412   case NVPTXISD::TexUnified2DS32FloatLevel:
   3413     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
   3414     break;
   3415   case NVPTXISD::TexUnified2DS32FloatGrad:
   3416     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
   3417     break;
   3418   case NVPTXISD::TexUnified2DU32S32:
   3419     Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
   3420     break;
   3421   case NVPTXISD::TexUnified2DU32Float:
   3422     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
   3423     break;
   3424   case NVPTXISD::TexUnified2DU32FloatLevel:
   3425     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
   3426     break;
   3427   case NVPTXISD::TexUnified2DU32FloatGrad:
   3428     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
   3429     break;
   3430   case NVPTXISD::TexUnified2DArrayFloatS32:
   3431     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
   3432     break;
   3433   case NVPTXISD::TexUnified2DArrayFloatFloat:
   3434     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
   3435     break;
   3436   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
   3437     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
   3438     break;
   3439   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
   3440     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
   3441     break;
   3442   case NVPTXISD::TexUnified2DArrayS32S32:
   3443     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
   3444     break;
   3445   case NVPTXISD::TexUnified2DArrayS32Float:
   3446     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
   3447     break;
   3448   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
   3449     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
   3450     break;
   3451   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
   3452     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
   3453     break;
   3454   case NVPTXISD::TexUnified2DArrayU32S32:
   3455     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
   3456     break;
   3457   case NVPTXISD::TexUnified2DArrayU32Float:
   3458     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
   3459     break;
   3460   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
   3461     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
   3462     break;
   3463   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
   3464     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
   3465     break;
   3466   case NVPTXISD::TexUnified3DFloatS32:
   3467     Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
   3468     break;
   3469   case NVPTXISD::TexUnified3DFloatFloat:
   3470     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
   3471     break;
   3472   case NVPTXISD::TexUnified3DFloatFloatLevel:
   3473     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
   3474     break;
   3475   case NVPTXISD::TexUnified3DFloatFloatGrad:
   3476     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
   3477     break;
   3478   case NVPTXISD::TexUnified3DS32S32:
   3479     Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
   3480     break;
   3481   case NVPTXISD::TexUnified3DS32Float:
   3482     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
   3483     break;
   3484   case NVPTXISD::TexUnified3DS32FloatLevel:
   3485     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
   3486     break;
   3487   case NVPTXISD::TexUnified3DS32FloatGrad:
   3488     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
   3489     break;
   3490   case NVPTXISD::TexUnified3DU32S32:
   3491     Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
   3492     break;
   3493   case NVPTXISD::TexUnified3DU32Float:
   3494     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
   3495     break;
   3496   case NVPTXISD::TexUnified3DU32FloatLevel:
   3497     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
   3498     break;
   3499   case NVPTXISD::TexUnified3DU32FloatGrad:
   3500     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
   3501     break;
   3502   case NVPTXISD::TexUnifiedCubeFloatFloat:
   3503     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
   3504     break;
   3505   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
   3506     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
   3507     break;
   3508   case NVPTXISD::TexUnifiedCubeS32Float:
   3509     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
   3510     break;
   3511   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
   3512     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
   3513     break;
   3514   case NVPTXISD::TexUnifiedCubeU32Float:
   3515     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
   3516     break;
   3517   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
   3518     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
   3519     break;
   3520   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
   3521     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
   3522     break;
   3523   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
   3524     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
   3525     break;
   3526   case NVPTXISD::TexUnifiedCubeArrayS32Float:
   3527     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
   3528     break;
   3529   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
   3530     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
   3531     break;
   3532   case NVPTXISD::TexUnifiedCubeArrayU32Float:
   3533     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
   3534     break;
   3535   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
   3536     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
   3537     break;
   3538   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
   3539     Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
   3540     break;
   3541   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
   3542     Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
   3543     break;
   3544   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
   3545     Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
   3546     break;
   3547   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
   3548     Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
   3549     break;
   3550   case NVPTXISD::Tld4UnifiedR2DS64Float:
   3551     Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
   3552     break;
   3553   case NVPTXISD::Tld4UnifiedG2DS64Float:
   3554     Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
   3555     break;
   3556   case NVPTXISD::Tld4UnifiedB2DS64Float:
   3557     Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
   3558     break;
   3559   case NVPTXISD::Tld4UnifiedA2DS64Float:
   3560     Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
   3561     break;
   3562   case NVPTXISD::Tld4UnifiedR2DU64Float:
   3563     Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
   3564     break;
   3565   case NVPTXISD::Tld4UnifiedG2DU64Float:
   3566     Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
   3567     break;
   3568   case NVPTXISD::Tld4UnifiedB2DU64Float:
   3569     Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
   3570     break;
   3571   case NVPTXISD::Tld4UnifiedA2DU64Float:
   3572     Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
   3573     break;
   3574   }
   3575 
   3576   // Copy over operands
   3577   for (unsigned i = 1; i < N->getNumOperands(); ++i) {
   3578     Ops.push_back(N->getOperand(i));
   3579   }
   3580 
   3581   Ops.push_back(Chain);
   3582   Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
   3583   return Ret;
   3584 }
   3585 
   3586 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
   3587   SDValue Chain = N->getOperand(0);
   3588   SDValue TexHandle = N->getOperand(1);
   3589   SDNode *Ret = nullptr;
   3590   unsigned Opc = 0;
   3591   SmallVector<SDValue, 8> Ops;
   3592   switch (N->getOpcode()) {
   3593   default: return nullptr;
   3594   case NVPTXISD::Suld1DI8Clamp:
   3595     Opc = NVPTX::SULD_1D_I8_CLAMP;
   3596     Ops.push_back(TexHandle);
   3597     Ops.push_back(N->getOperand(2));
   3598     Ops.push_back(Chain);
   3599     break;
   3600   case NVPTXISD::Suld1DI16Clamp:
   3601     Opc = NVPTX::SULD_1D_I16_CLAMP;
   3602     Ops.push_back(TexHandle);
   3603     Ops.push_back(N->getOperand(2));
   3604     Ops.push_back(Chain);
   3605     break;
   3606   case NVPTXISD::Suld1DI32Clamp:
   3607     Opc = NVPTX::SULD_1D_I32_CLAMP;
   3608     Ops.push_back(TexHandle);
   3609     Ops.push_back(N->getOperand(2));
   3610     Ops.push_back(Chain);
   3611     break;
   3612   case NVPTXISD::Suld1DI64Clamp:
   3613     Opc = NVPTX::SULD_1D_I64_CLAMP;
   3614     Ops.push_back(TexHandle);
   3615     Ops.push_back(N->getOperand(2));
   3616     Ops.push_back(Chain);
   3617     break;
   3618   case NVPTXISD::Suld1DV2I8Clamp:
   3619     Opc = NVPTX::SULD_1D_V2I8_CLAMP;
   3620     Ops.push_back(TexHandle);
   3621     Ops.push_back(N->getOperand(2));
   3622     Ops.push_back(Chain);
   3623     break;
   3624   case NVPTXISD::Suld1DV2I16Clamp:
   3625     Opc = NVPTX::SULD_1D_V2I16_CLAMP;
   3626     Ops.push_back(TexHandle);
   3627     Ops.push_back(N->getOperand(2));
   3628     Ops.push_back(Chain);
   3629     break;
   3630   case NVPTXISD::Suld1DV2I32Clamp:
   3631     Opc = NVPTX::SULD_1D_V2I32_CLAMP;
   3632     Ops.push_back(TexHandle);
   3633     Ops.push_back(N->getOperand(2));
   3634     Ops.push_back(Chain);
   3635     break;
   3636   case NVPTXISD::Suld1DV2I64Clamp:
   3637     Opc = NVPTX::SULD_1D_V2I64_CLAMP;
   3638     Ops.push_back(TexHandle);
   3639     Ops.push_back(N->getOperand(2));
   3640     Ops.push_back(Chain);
   3641     break;
   3642   case NVPTXISD::Suld1DV4I8Clamp:
   3643     Opc = NVPTX::SULD_1D_V4I8_CLAMP;
   3644     Ops.push_back(TexHandle);
   3645     Ops.push_back(N->getOperand(2));
   3646     Ops.push_back(Chain);
   3647     break;
   3648   case NVPTXISD::Suld1DV4I16Clamp:
   3649     Opc = NVPTX::SULD_1D_V4I16_CLAMP;
   3650     Ops.push_back(TexHandle);
   3651     Ops.push_back(N->getOperand(2));
   3652     Ops.push_back(Chain);
   3653     break;
   3654   case NVPTXISD::Suld1DV4I32Clamp:
   3655     Opc = NVPTX::SULD_1D_V4I32_CLAMP;
   3656     Ops.push_back(TexHandle);
   3657     Ops.push_back(N->getOperand(2));
   3658     Ops.push_back(Chain);
   3659     break;
   3660   case NVPTXISD::Suld1DArrayI8Clamp:
   3661     Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
   3662     Ops.push_back(TexHandle);
   3663     Ops.push_back(N->getOperand(2));
   3664     Ops.push_back(N->getOperand(3));
   3665     Ops.push_back(Chain);
   3666     break;
   3667   case NVPTXISD::Suld1DArrayI16Clamp:
   3668     Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
   3669     Ops.push_back(TexHandle);
   3670     Ops.push_back(N->getOperand(2));
   3671     Ops.push_back(N->getOperand(3));
   3672     Ops.push_back(Chain);
   3673     break;
   3674   case NVPTXISD::Suld1DArrayI32Clamp:
   3675     Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
   3676     Ops.push_back(TexHandle);
   3677     Ops.push_back(N->getOperand(2));
   3678     Ops.push_back(N->getOperand(3));
   3679     Ops.push_back(Chain);
   3680     break;
   3681   case NVPTXISD::Suld1DArrayI64Clamp:
   3682     Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
   3683     Ops.push_back(TexHandle);
   3684     Ops.push_back(N->getOperand(2));
   3685     Ops.push_back(N->getOperand(3));
   3686     Ops.push_back(Chain);
   3687     break;
   3688   case NVPTXISD::Suld1DArrayV2I8Clamp:
   3689     Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
   3690     Ops.push_back(TexHandle);
   3691     Ops.push_back(N->getOperand(2));
   3692     Ops.push_back(N->getOperand(3));
   3693     Ops.push_back(Chain);
   3694     break;
   3695   case NVPTXISD::Suld1DArrayV2I16Clamp:
   3696     Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
   3697     Ops.push_back(TexHandle);
   3698     Ops.push_back(N->getOperand(2));
   3699     Ops.push_back(N->getOperand(3));
   3700     Ops.push_back(Chain);
   3701     break;
   3702   case NVPTXISD::Suld1DArrayV2I32Clamp:
   3703     Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
   3704     Ops.push_back(TexHandle);
   3705     Ops.push_back(N->getOperand(2));
   3706     Ops.push_back(N->getOperand(3));
   3707     Ops.push_back(Chain);
   3708     break;
   3709   case NVPTXISD::Suld1DArrayV2I64Clamp:
   3710     Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
   3711     Ops.push_back(TexHandle);
   3712     Ops.push_back(N->getOperand(2));
   3713     Ops.push_back(N->getOperand(3));
   3714     Ops.push_back(Chain);
   3715     break;
   3716   case NVPTXISD::Suld1DArrayV4I8Clamp:
   3717     Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
   3718     Ops.push_back(TexHandle);
   3719     Ops.push_back(N->getOperand(2));
   3720     Ops.push_back(N->getOperand(3));
   3721     Ops.push_back(Chain);
   3722     break;
   3723   case NVPTXISD::Suld1DArrayV4I16Clamp:
   3724     Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
   3725     Ops.push_back(TexHandle);
   3726     Ops.push_back(N->getOperand(2));
   3727     Ops.push_back(N->getOperand(3));
   3728     Ops.push_back(Chain);
   3729     break;
   3730   case NVPTXISD::Suld1DArrayV4I32Clamp:
   3731     Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
   3732     Ops.push_back(TexHandle);
   3733     Ops.push_back(N->getOperand(2));
   3734     Ops.push_back(N->getOperand(3));
   3735     Ops.push_back(Chain);
   3736     break;
   3737   case NVPTXISD::Suld2DI8Clamp:
   3738     Opc = NVPTX::SULD_2D_I8_CLAMP;
   3739     Ops.push_back(TexHandle);
   3740     Ops.push_back(N->getOperand(2));
   3741     Ops.push_back(N->getOperand(3));
   3742     Ops.push_back(Chain);
   3743     break;
   3744   case NVPTXISD::Suld2DI16Clamp:
   3745     Opc = NVPTX::SULD_2D_I16_CLAMP;
   3746     Ops.push_back(TexHandle);
   3747     Ops.push_back(N->getOperand(2));
   3748     Ops.push_back(N->getOperand(3));
   3749     Ops.push_back(Chain);
   3750     break;
   3751   case NVPTXISD::Suld2DI32Clamp:
   3752     Opc = NVPTX::SULD_2D_I32_CLAMP;
   3753     Ops.push_back(TexHandle);
   3754     Ops.push_back(N->getOperand(2));
   3755     Ops.push_back(N->getOperand(3));
   3756     Ops.push_back(Chain);
   3757     break;
   3758   case NVPTXISD::Suld2DI64Clamp:
   3759     Opc = NVPTX::SULD_2D_I64_CLAMP;
   3760     Ops.push_back(TexHandle);
   3761     Ops.push_back(N->getOperand(2));
   3762     Ops.push_back(N->getOperand(3));
   3763     Ops.push_back(Chain);
   3764     break;
   3765   case NVPTXISD::Suld2DV2I8Clamp:
   3766     Opc = NVPTX::SULD_2D_V2I8_CLAMP;
   3767     Ops.push_back(TexHandle);
   3768     Ops.push_back(N->getOperand(2));
   3769     Ops.push_back(N->getOperand(3));
   3770     Ops.push_back(Chain);
   3771     break;
   3772   case NVPTXISD::Suld2DV2I16Clamp:
   3773     Opc = NVPTX::SULD_2D_V2I16_CLAMP;
   3774     Ops.push_back(TexHandle);
   3775     Ops.push_back(N->getOperand(2));
   3776     Ops.push_back(N->getOperand(3));
   3777     Ops.push_back(Chain);
   3778     break;
   3779   case NVPTXISD::Suld2DV2I32Clamp:
   3780     Opc = NVPTX::SULD_2D_V2I32_CLAMP;
   3781     Ops.push_back(TexHandle);
   3782     Ops.push_back(N->getOperand(2));
   3783     Ops.push_back(N->getOperand(3));
   3784     Ops.push_back(Chain);
   3785     break;
   3786   case NVPTXISD::Suld2DV2I64Clamp:
   3787     Opc = NVPTX::SULD_2D_V2I64_CLAMP;
   3788     Ops.push_back(TexHandle);
   3789     Ops.push_back(N->getOperand(2));
   3790     Ops.push_back(N->getOperand(3));
   3791     Ops.push_back(Chain);
   3792     break;
   3793   case NVPTXISD::Suld2DV4I8Clamp:
   3794     Opc = NVPTX::SULD_2D_V4I8_CLAMP;
   3795     Ops.push_back(TexHandle);
   3796     Ops.push_back(N->getOperand(2));
   3797     Ops.push_back(N->getOperand(3));
   3798     Ops.push_back(Chain);
   3799     break;
   3800   case NVPTXISD::Suld2DV4I16Clamp:
   3801     Opc = NVPTX::SULD_2D_V4I16_CLAMP;
   3802     Ops.push_back(TexHandle);
   3803     Ops.push_back(N->getOperand(2));
   3804     Ops.push_back(N->getOperand(3));
   3805     Ops.push_back(Chain);
   3806     break;
   3807   case NVPTXISD::Suld2DV4I32Clamp:
   3808     Opc = NVPTX::SULD_2D_V4I32_CLAMP;
   3809     Ops.push_back(TexHandle);
   3810     Ops.push_back(N->getOperand(2));
   3811     Ops.push_back(N->getOperand(3));
   3812     Ops.push_back(Chain);
   3813     break;
   3814   case NVPTXISD::Suld2DArrayI8Clamp:
   3815     Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
   3816     Ops.push_back(TexHandle);
   3817     Ops.push_back(N->getOperand(2));
   3818     Ops.push_back(N->getOperand(3));
   3819     Ops.push_back(N->getOperand(4));
   3820     Ops.push_back(Chain);
   3821     break;
   3822   case NVPTXISD::Suld2DArrayI16Clamp:
   3823     Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
   3824     Ops.push_back(TexHandle);
   3825     Ops.push_back(N->getOperand(2));
   3826     Ops.push_back(N->getOperand(3));
   3827     Ops.push_back(N->getOperand(4));
   3828     Ops.push_back(Chain);
   3829     break;
   3830   case NVPTXISD::Suld2DArrayI32Clamp:
   3831     Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
   3832     Ops.push_back(TexHandle);
   3833     Ops.push_back(N->getOperand(2));
   3834     Ops.push_back(N->getOperand(3));
   3835     Ops.push_back(N->getOperand(4));
   3836     Ops.push_back(Chain);
   3837     break;
   3838   case NVPTXISD::Suld2DArrayI64Clamp:
   3839     Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
   3840     Ops.push_back(TexHandle);
   3841     Ops.push_back(N->getOperand(2));
   3842     Ops.push_back(N->getOperand(3));
   3843     Ops.push_back(N->getOperand(4));
   3844     Ops.push_back(Chain);
   3845     break;
   3846   case NVPTXISD::Suld2DArrayV2I8Clamp:
   3847     Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
   3848     Ops.push_back(TexHandle);
   3849     Ops.push_back(N->getOperand(2));
   3850     Ops.push_back(N->getOperand(3));
   3851     Ops.push_back(N->getOperand(4));
   3852     Ops.push_back(Chain);
   3853     break;
   3854   case NVPTXISD::Suld2DArrayV2I16Clamp:
   3855     Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
   3856     Ops.push_back(TexHandle);
   3857     Ops.push_back(N->getOperand(2));
   3858     Ops.push_back(N->getOperand(3));
   3859     Ops.push_back(N->getOperand(4));
   3860     Ops.push_back(Chain);
   3861     break;
   3862   case NVPTXISD::Suld2DArrayV2I32Clamp:
   3863     Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
   3864     Ops.push_back(TexHandle);
   3865     Ops.push_back(N->getOperand(2));
   3866     Ops.push_back(N->getOperand(3));
   3867     Ops.push_back(N->getOperand(4));
   3868     Ops.push_back(Chain);
   3869     break;
   3870   case NVPTXISD::Suld2DArrayV2I64Clamp:
   3871     Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
   3872     Ops.push_back(TexHandle);
   3873     Ops.push_back(N->getOperand(2));
   3874     Ops.push_back(N->getOperand(3));
   3875     Ops.push_back(N->getOperand(4));
   3876     Ops.push_back(Chain);
   3877     break;
   3878   case NVPTXISD::Suld2DArrayV4I8Clamp:
   3879     Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
   3880     Ops.push_back(TexHandle);
   3881     Ops.push_back(N->getOperand(2));
   3882     Ops.push_back(N->getOperand(3));
   3883     Ops.push_back(N->getOperand(4));
   3884     Ops.push_back(Chain);
   3885     break;
   3886   case NVPTXISD::Suld2DArrayV4I16Clamp:
   3887     Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
   3888     Ops.push_back(TexHandle);
   3889     Ops.push_back(N->getOperand(2));
   3890     Ops.push_back(N->getOperand(3));
   3891     Ops.push_back(N->getOperand(4));
   3892     Ops.push_back(Chain);
   3893     break;
   3894   case NVPTXISD::Suld2DArrayV4I32Clamp:
   3895     Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
   3896     Ops.push_back(TexHandle);
   3897     Ops.push_back(N->getOperand(2));
   3898     Ops.push_back(N->getOperand(3));
   3899     Ops.push_back(N->getOperand(4));
   3900     Ops.push_back(Chain);
   3901     break;
   3902   case NVPTXISD::Suld3DI8Clamp:
   3903     Opc = NVPTX::SULD_3D_I8_CLAMP;
   3904     Ops.push_back(TexHandle);
   3905     Ops.push_back(N->getOperand(2));
   3906     Ops.push_back(N->getOperand(3));
   3907     Ops.push_back(N->getOperand(4));
   3908     Ops.push_back(Chain);
   3909     break;
   3910   case NVPTXISD::Suld3DI16Clamp:
   3911     Opc = NVPTX::SULD_3D_I16_CLAMP;
   3912     Ops.push_back(TexHandle);
   3913     Ops.push_back(N->getOperand(2));
   3914     Ops.push_back(N->getOperand(3));
   3915     Ops.push_back(N->getOperand(4));
   3916     Ops.push_back(Chain);
   3917     break;
   3918   case NVPTXISD::Suld3DI32Clamp:
   3919     Opc = NVPTX::SULD_3D_I32_CLAMP;
   3920     Ops.push_back(TexHandle);
   3921     Ops.push_back(N->getOperand(2));
   3922     Ops.push_back(N->getOperand(3));
   3923     Ops.push_back(N->getOperand(4));
   3924     Ops.push_back(Chain);
   3925     break;
   3926   case NVPTXISD::Suld3DI64Clamp:
   3927     Opc = NVPTX::SULD_3D_I64_CLAMP;
   3928     Ops.push_back(TexHandle);
   3929     Ops.push_back(N->getOperand(2));
   3930     Ops.push_back(N->getOperand(3));
   3931     Ops.push_back(N->getOperand(4));
   3932     Ops.push_back(Chain);
   3933     break;
   3934   case NVPTXISD::Suld3DV2I8Clamp:
   3935     Opc = NVPTX::SULD_3D_V2I8_CLAMP;
   3936     Ops.push_back(TexHandle);
   3937     Ops.push_back(N->getOperand(2));
   3938     Ops.push_back(N->getOperand(3));
   3939     Ops.push_back(N->getOperand(4));
   3940     Ops.push_back(Chain);
   3941     break;
   3942   case NVPTXISD::Suld3DV2I16Clamp:
   3943     Opc = NVPTX::SULD_3D_V2I16_CLAMP;
   3944     Ops.push_back(TexHandle);
   3945     Ops.push_back(N->getOperand(2));
   3946     Ops.push_back(N->getOperand(3));
   3947     Ops.push_back(N->getOperand(4));
   3948     Ops.push_back(Chain);
   3949     break;
   3950   case NVPTXISD::Suld3DV2I32Clamp:
   3951     Opc = NVPTX::SULD_3D_V2I32_CLAMP;
   3952     Ops.push_back(TexHandle);
   3953     Ops.push_back(N->getOperand(2));
   3954     Ops.push_back(N->getOperand(3));
   3955     Ops.push_back(N->getOperand(4));
   3956     Ops.push_back(Chain);
   3957     break;
   3958   case NVPTXISD::Suld3DV2I64Clamp:
   3959     Opc = NVPTX::SULD_3D_V2I64_CLAMP;
   3960     Ops.push_back(TexHandle);
   3961     Ops.push_back(N->getOperand(2));
   3962     Ops.push_back(N->getOperand(3));
   3963     Ops.push_back(N->getOperand(4));
   3964     Ops.push_back(Chain);
   3965     break;
   3966   case NVPTXISD::Suld3DV4I8Clamp:
   3967     Opc = NVPTX::SULD_3D_V4I8_CLAMP;
   3968     Ops.push_back(TexHandle);
   3969     Ops.push_back(N->getOperand(2));
   3970     Ops.push_back(N->getOperand(3));
   3971     Ops.push_back(N->getOperand(4));
   3972     Ops.push_back(Chain);
   3973     break;
   3974   case NVPTXISD::Suld3DV4I16Clamp:
   3975     Opc = NVPTX::SULD_3D_V4I16_CLAMP;
   3976     Ops.push_back(TexHandle);
   3977     Ops.push_back(N->getOperand(2));
   3978     Ops.push_back(N->getOperand(3));
   3979     Ops.push_back(N->getOperand(4));
   3980     Ops.push_back(Chain);
   3981     break;
   3982   case NVPTXISD::Suld3DV4I32Clamp:
   3983     Opc = NVPTX::SULD_3D_V4I32_CLAMP;
   3984     Ops.push_back(TexHandle);
   3985     Ops.push_back(N->getOperand(2));
   3986     Ops.push_back(N->getOperand(3));
   3987     Ops.push_back(N->getOperand(4));
   3988     Ops.push_back(Chain);
   3989     break;
   3990   case NVPTXISD::Suld1DI8Trap:
   3991     Opc = NVPTX::SULD_1D_I8_TRAP;
   3992     Ops.push_back(TexHandle);
   3993     Ops.push_back(N->getOperand(2));
   3994     Ops.push_back(Chain);
   3995     break;
   3996   case NVPTXISD::Suld1DI16Trap:
   3997     Opc = NVPTX::SULD_1D_I16_TRAP;
   3998     Ops.push_back(TexHandle);
   3999     Ops.push_back(N->getOperand(2));
   4000     Ops.push_back(Chain);
   4001     break;
   4002   case NVPTXISD::Suld1DI32Trap:
   4003     Opc = NVPTX::SULD_1D_I32_TRAP;
   4004     Ops.push_back(TexHandle);
   4005     Ops.push_back(N->getOperand(2));
   4006     Ops.push_back(Chain);
   4007     break;
   4008   case NVPTXISD::Suld1DI64Trap:
   4009     Opc = NVPTX::SULD_1D_I64_TRAP;
   4010     Ops.push_back(TexHandle);
   4011     Ops.push_back(N->getOperand(2));
   4012     Ops.push_back(Chain);
   4013     break;
   4014   case NVPTXISD::Suld1DV2I8Trap:
   4015     Opc = NVPTX::SULD_1D_V2I8_TRAP;
   4016     Ops.push_back(TexHandle);
   4017     Ops.push_back(N->getOperand(2));
   4018     Ops.push_back(Chain);
   4019     break;
   4020   case NVPTXISD::Suld1DV2I16Trap:
   4021     Opc = NVPTX::SULD_1D_V2I16_TRAP;
   4022     Ops.push_back(TexHandle);
   4023     Ops.push_back(N->getOperand(2));
   4024     Ops.push_back(Chain);
   4025     break;
   4026   case NVPTXISD::Suld1DV2I32Trap:
   4027     Opc = NVPTX::SULD_1D_V2I32_TRAP;
   4028     Ops.push_back(TexHandle);
   4029     Ops.push_back(N->getOperand(2));
   4030     Ops.push_back(Chain);
   4031     break;
   4032   case NVPTXISD::Suld1DV2I64Trap:
   4033     Opc = NVPTX::SULD_1D_V2I64_TRAP;
   4034     Ops.push_back(TexHandle);
   4035     Ops.push_back(N->getOperand(2));
   4036     Ops.push_back(Chain);
   4037     break;
   4038   case NVPTXISD::Suld1DV4I8Trap:
   4039     Opc = NVPTX::SULD_1D_V4I8_TRAP;
   4040     Ops.push_back(TexHandle);
   4041     Ops.push_back(N->getOperand(2));
   4042     Ops.push_back(Chain);
   4043     break;
   4044   case NVPTXISD::Suld1DV4I16Trap:
   4045     Opc = NVPTX::SULD_1D_V4I16_TRAP;
   4046     Ops.push_back(TexHandle);
   4047     Ops.push_back(N->getOperand(2));
   4048     Ops.push_back(Chain);
   4049     break;
   4050   case NVPTXISD::Suld1DV4I32Trap:
   4051     Opc = NVPTX::SULD_1D_V4I32_TRAP;
   4052     Ops.push_back(TexHandle);
   4053     Ops.push_back(N->getOperand(2));
   4054     Ops.push_back(Chain);
   4055     break;
   4056   case NVPTXISD::Suld1DArrayI8Trap:
   4057     Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
   4058     Ops.push_back(TexHandle);
   4059     Ops.push_back(N->getOperand(2));
   4060     Ops.push_back(N->getOperand(3));
   4061     Ops.push_back(Chain);
   4062     break;
   4063   case NVPTXISD::Suld1DArrayI16Trap:
   4064     Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
   4065     Ops.push_back(TexHandle);
   4066     Ops.push_back(N->getOperand(2));
   4067     Ops.push_back(N->getOperand(3));
   4068     Ops.push_back(Chain);
   4069     break;
   4070   case NVPTXISD::Suld1DArrayI32Trap:
   4071     Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
   4072     Ops.push_back(TexHandle);
   4073     Ops.push_back(N->getOperand(2));
   4074     Ops.push_back(N->getOperand(3));
   4075     Ops.push_back(Chain);
   4076     break;
   4077   case NVPTXISD::Suld1DArrayI64Trap:
   4078     Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
   4079     Ops.push_back(TexHandle);
   4080     Ops.push_back(N->getOperand(2));
   4081     Ops.push_back(N->getOperand(3));
   4082     Ops.push_back(Chain);
   4083     break;
   4084   case NVPTXISD::Suld1DArrayV2I8Trap:
   4085     Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
   4086     Ops.push_back(TexHandle);
   4087     Ops.push_back(N->getOperand(2));
   4088     Ops.push_back(N->getOperand(3));
   4089     Ops.push_back(Chain);
   4090     break;
   4091   case NVPTXISD::Suld1DArrayV2I16Trap:
   4092     Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
   4093     Ops.push_back(TexHandle);
   4094     Ops.push_back(N->getOperand(2));
   4095     Ops.push_back(N->getOperand(3));
   4096     Ops.push_back(Chain);
   4097     break;
   4098   case NVPTXISD::Suld1DArrayV2I32Trap:
   4099     Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
   4100     Ops.push_back(TexHandle);
   4101     Ops.push_back(N->getOperand(2));
   4102     Ops.push_back(N->getOperand(3));
   4103     Ops.push_back(Chain);
   4104     break;
   4105   case NVPTXISD::Suld1DArrayV2I64Trap:
   4106     Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
   4107     Ops.push_back(TexHandle);
   4108     Ops.push_back(N->getOperand(2));
   4109     Ops.push_back(N->getOperand(3));
   4110     Ops.push_back(Chain);
   4111     break;
   4112   case NVPTXISD::Suld1DArrayV4I8Trap:
   4113     Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
   4114     Ops.push_back(TexHandle);
   4115     Ops.push_back(N->getOperand(2));
   4116     Ops.push_back(N->getOperand(3));
   4117     Ops.push_back(Chain);
   4118     break;
   4119   case NVPTXISD::Suld1DArrayV4I16Trap:
   4120     Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
   4121     Ops.push_back(TexHandle);
   4122     Ops.push_back(N->getOperand(2));
   4123     Ops.push_back(N->getOperand(3));
   4124     Ops.push_back(Chain);
   4125     break;
   4126   case NVPTXISD::Suld1DArrayV4I32Trap:
   4127     Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
   4128     Ops.push_back(TexHandle);
   4129     Ops.push_back(N->getOperand(2));
   4130     Ops.push_back(N->getOperand(3));
   4131     Ops.push_back(Chain);
   4132     break;
   4133   case NVPTXISD::Suld2DI8Trap:
   4134     Opc = NVPTX::SULD_2D_I8_TRAP;
   4135     Ops.push_back(TexHandle);
   4136     Ops.push_back(N->getOperand(2));
   4137     Ops.push_back(N->getOperand(3));
   4138     Ops.push_back(Chain);
   4139     break;
   4140   case NVPTXISD::Suld2DI16Trap:
   4141     Opc = NVPTX::SULD_2D_I16_TRAP;
   4142     Ops.push_back(TexHandle);
   4143     Ops.push_back(N->getOperand(2));
   4144     Ops.push_back(N->getOperand(3));
   4145     Ops.push_back(Chain);
   4146     break;
   4147   case NVPTXISD::Suld2DI32Trap:
   4148     Opc = NVPTX::SULD_2D_I32_TRAP;
   4149     Ops.push_back(TexHandle);
   4150     Ops.push_back(N->getOperand(2));
   4151     Ops.push_back(N->getOperand(3));
   4152     Ops.push_back(Chain);
   4153     break;
   4154   case NVPTXISD::Suld2DI64Trap:
   4155     Opc = NVPTX::SULD_2D_I64_TRAP;
   4156     Ops.push_back(TexHandle);
   4157     Ops.push_back(N->getOperand(2));
   4158     Ops.push_back(N->getOperand(3));
   4159     Ops.push_back(Chain);
   4160     break;
   4161   case NVPTXISD::Suld2DV2I8Trap:
   4162     Opc = NVPTX::SULD_2D_V2I8_TRAP;
   4163     Ops.push_back(TexHandle);
   4164     Ops.push_back(N->getOperand(2));
   4165     Ops.push_back(N->getOperand(3));
   4166     Ops.push_back(Chain);
   4167     break;
   4168   case NVPTXISD::Suld2DV2I16Trap:
   4169     Opc = NVPTX::SULD_2D_V2I16_TRAP;
   4170     Ops.push_back(TexHandle);
   4171     Ops.push_back(N->getOperand(2));
   4172     Ops.push_back(N->getOperand(3));
   4173     Ops.push_back(Chain);
   4174     break;
   4175   case NVPTXISD::Suld2DV2I32Trap:
   4176     Opc = NVPTX::SULD_2D_V2I32_TRAP;
   4177     Ops.push_back(TexHandle);
   4178     Ops.push_back(N->getOperand(2));
   4179     Ops.push_back(N->getOperand(3));
   4180     Ops.push_back(Chain);
   4181     break;
   4182   case NVPTXISD::Suld2DV2I64Trap:
   4183     Opc = NVPTX::SULD_2D_V2I64_TRAP;
   4184     Ops.push_back(TexHandle);
   4185     Ops.push_back(N->getOperand(2));
   4186     Ops.push_back(N->getOperand(3));
   4187     Ops.push_back(Chain);
   4188     break;
   4189   case NVPTXISD::Suld2DV4I8Trap:
   4190     Opc = NVPTX::SULD_2D_V4I8_TRAP;
   4191     Ops.push_back(TexHandle);
   4192     Ops.push_back(N->getOperand(2));
   4193     Ops.push_back(N->getOperand(3));
   4194     Ops.push_back(Chain);
   4195     break;
   4196   case NVPTXISD::Suld2DV4I16Trap:
   4197     Opc = NVPTX::SULD_2D_V4I16_TRAP;
   4198     Ops.push_back(TexHandle);
   4199     Ops.push_back(N->getOperand(2));
   4200     Ops.push_back(N->getOperand(3));
   4201     Ops.push_back(Chain);
   4202     break;
   4203   case NVPTXISD::Suld2DV4I32Trap:
   4204     Opc = NVPTX::SULD_2D_V4I32_TRAP;
   4205     Ops.push_back(TexHandle);
   4206     Ops.push_back(N->getOperand(2));
   4207     Ops.push_back(N->getOperand(3));
   4208     Ops.push_back(Chain);
   4209     break;
   4210   case NVPTXISD::Suld2DArrayI8Trap:
   4211     Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
   4212     Ops.push_back(TexHandle);
   4213     Ops.push_back(N->getOperand(2));
   4214     Ops.push_back(N->getOperand(3));
   4215     Ops.push_back(N->getOperand(4));
   4216     Ops.push_back(Chain);
   4217     break;
   4218   case NVPTXISD::Suld2DArrayI16Trap:
   4219     Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
   4220     Ops.push_back(TexHandle);
   4221     Ops.push_back(N->getOperand(2));
   4222     Ops.push_back(N->getOperand(3));
   4223     Ops.push_back(N->getOperand(4));
   4224     Ops.push_back(Chain);
   4225     break;
   4226   case NVPTXISD::Suld2DArrayI32Trap:
   4227     Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
   4228     Ops.push_back(TexHandle);
   4229     Ops.push_back(N->getOperand(2));
   4230     Ops.push_back(N->getOperand(3));
   4231     Ops.push_back(N->getOperand(4));
   4232     Ops.push_back(Chain);
   4233     break;
   4234   case NVPTXISD::Suld2DArrayI64Trap:
   4235     Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
   4236     Ops.push_back(TexHandle);
   4237     Ops.push_back(N->getOperand(2));
   4238     Ops.push_back(N->getOperand(3));
   4239     Ops.push_back(N->getOperand(4));
   4240     Ops.push_back(Chain);
   4241     break;
   4242   case NVPTXISD::Suld2DArrayV2I8Trap:
   4243     Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
   4244     Ops.push_back(TexHandle);
   4245     Ops.push_back(N->getOperand(2));
   4246     Ops.push_back(N->getOperand(3));
   4247     Ops.push_back(N->getOperand(4));
   4248     Ops.push_back(Chain);
   4249     break;
   4250   case NVPTXISD::Suld2DArrayV2I16Trap:
   4251     Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
   4252     Ops.push_back(TexHandle);
   4253     Ops.push_back(N->getOperand(2));
   4254     Ops.push_back(N->getOperand(3));
   4255     Ops.push_back(N->getOperand(4));
   4256     Ops.push_back(Chain);
   4257     break;
   4258   case NVPTXISD::Suld2DArrayV2I32Trap:
   4259     Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
   4260     Ops.push_back(TexHandle);
   4261     Ops.push_back(N->getOperand(2));
   4262     Ops.push_back(N->getOperand(3));
   4263     Ops.push_back(N->getOperand(4));
   4264     Ops.push_back(Chain);
   4265     break;
   4266   case NVPTXISD::Suld2DArrayV2I64Trap:
   4267     Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
   4268     Ops.push_back(TexHandle);
   4269     Ops.push_back(N->getOperand(2));
   4270     Ops.push_back(N->getOperand(3));
   4271     Ops.push_back(N->getOperand(4));
   4272     Ops.push_back(Chain);
   4273     break;
   4274   case NVPTXISD::Suld2DArrayV4I8Trap:
   4275     Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
   4276     Ops.push_back(TexHandle);
   4277     Ops.push_back(N->getOperand(2));
   4278     Ops.push_back(N->getOperand(3));
   4279     Ops.push_back(N->getOperand(4));
   4280     Ops.push_back(Chain);
   4281     break;
   4282   case NVPTXISD::Suld2DArrayV4I16Trap:
   4283     Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
   4284     Ops.push_back(TexHandle);
   4285     Ops.push_back(N->getOperand(2));
   4286     Ops.push_back(N->getOperand(3));
   4287     Ops.push_back(N->getOperand(4));
   4288     Ops.push_back(Chain);
   4289     break;
   4290   case NVPTXISD::Suld2DArrayV4I32Trap:
   4291     Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
   4292     Ops.push_back(TexHandle);
   4293     Ops.push_back(N->getOperand(2));
   4294     Ops.push_back(N->getOperand(3));
   4295     Ops.push_back(N->getOperand(4));
   4296     Ops.push_back(Chain);
   4297     break;
   4298   case NVPTXISD::Suld3DI8Trap:
   4299     Opc = NVPTX::SULD_3D_I8_TRAP;
   4300     Ops.push_back(TexHandle);
   4301     Ops.push_back(N->getOperand(2));
   4302     Ops.push_back(N->getOperand(3));
   4303     Ops.push_back(N->getOperand(4));
   4304     Ops.push_back(Chain);
   4305     break;
   4306   case NVPTXISD::Suld3DI16Trap:
   4307     Opc = NVPTX::SULD_3D_I16_TRAP;
   4308     Ops.push_back(TexHandle);
   4309     Ops.push_back(N->getOperand(2));
   4310     Ops.push_back(N->getOperand(3));
   4311     Ops.push_back(N->getOperand(4));
   4312     Ops.push_back(Chain);
   4313     break;
   4314   case NVPTXISD::Suld3DI32Trap:
   4315     Opc = NVPTX::SULD_3D_I32_TRAP;
   4316     Ops.push_back(TexHandle);
   4317     Ops.push_back(N->getOperand(2));
   4318     Ops.push_back(N->getOperand(3));
   4319     Ops.push_back(N->getOperand(4));
   4320     Ops.push_back(Chain);
   4321     break;
   4322   case NVPTXISD::Suld3DI64Trap:
   4323     Opc = NVPTX::SULD_3D_I64_TRAP;
   4324     Ops.push_back(TexHandle);
   4325     Ops.push_back(N->getOperand(2));
   4326     Ops.push_back(N->getOperand(3));
   4327     Ops.push_back(N->getOperand(4));
   4328     Ops.push_back(Chain);
   4329     break;
   4330   case NVPTXISD::Suld3DV2I8Trap:
   4331     Opc = NVPTX::SULD_3D_V2I8_TRAP;
   4332     Ops.push_back(TexHandle);
   4333     Ops.push_back(N->getOperand(2));
   4334     Ops.push_back(N->getOperand(3));
   4335     Ops.push_back(N->getOperand(4));
   4336     Ops.push_back(Chain);
   4337     break;
   4338   case NVPTXISD::Suld3DV2I16Trap:
   4339     Opc = NVPTX::SULD_3D_V2I16_TRAP;
   4340     Ops.push_back(TexHandle);
   4341     Ops.push_back(N->getOperand(2));
   4342     Ops.push_back(N->getOperand(3));
   4343     Ops.push_back(N->getOperand(4));
   4344     Ops.push_back(Chain);
   4345     break;
   4346   case NVPTXISD::Suld3DV2I32Trap:
   4347     Opc = NVPTX::SULD_3D_V2I32_TRAP;
   4348     Ops.push_back(TexHandle);
   4349     Ops.push_back(N->getOperand(2));
   4350     Ops.push_back(N->getOperand(3));
   4351     Ops.push_back(N->getOperand(4));
   4352     Ops.push_back(Chain);
   4353     break;
   4354   case NVPTXISD::Suld3DV2I64Trap:
   4355     Opc = NVPTX::SULD_3D_V2I64_TRAP;
   4356     Ops.push_back(TexHandle);
   4357     Ops.push_back(N->getOperand(2));
   4358     Ops.push_back(N->getOperand(3));
   4359     Ops.push_back(N->getOperand(4));
   4360     Ops.push_back(Chain);
   4361     break;
   4362   case NVPTXISD::Suld3DV4I8Trap:
   4363     Opc = NVPTX::SULD_3D_V4I8_TRAP;
   4364     Ops.push_back(TexHandle);
   4365     Ops.push_back(N->getOperand(2));
   4366     Ops.push_back(N->getOperand(3));
   4367     Ops.push_back(N->getOperand(4));
   4368     Ops.push_back(Chain);
   4369     break;
   4370   case NVPTXISD::Suld3DV4I16Trap:
   4371     Opc = NVPTX::SULD_3D_V4I16_TRAP;
   4372     Ops.push_back(TexHandle);
   4373     Ops.push_back(N->getOperand(2));
   4374     Ops.push_back(N->getOperand(3));
   4375     Ops.push_back(N->getOperand(4));
   4376     Ops.push_back(Chain);
   4377     break;
   4378   case NVPTXISD::Suld3DV4I32Trap:
   4379     Opc = NVPTX::SULD_3D_V4I32_TRAP;
   4380     Ops.push_back(TexHandle);
   4381     Ops.push_back(N->getOperand(2));
   4382     Ops.push_back(N->getOperand(3));
   4383     Ops.push_back(N->getOperand(4));
   4384     Ops.push_back(Chain);
   4385     break;
   4386   case NVPTXISD::Suld1DI8Zero:
   4387     Opc = NVPTX::SULD_1D_I8_ZERO;
   4388     Ops.push_back(TexHandle);
   4389     Ops.push_back(N->getOperand(2));
   4390     Ops.push_back(Chain);
   4391     break;
   4392   case NVPTXISD::Suld1DI16Zero:
   4393     Opc = NVPTX::SULD_1D_I16_ZERO;
   4394     Ops.push_back(TexHandle);
   4395     Ops.push_back(N->getOperand(2));
   4396     Ops.push_back(Chain);
   4397     break;
   4398   case NVPTXISD::Suld1DI32Zero:
   4399     Opc = NVPTX::SULD_1D_I32_ZERO;
   4400     Ops.push_back(TexHandle);
   4401     Ops.push_back(N->getOperand(2));
   4402     Ops.push_back(Chain);
   4403     break;
   4404   case NVPTXISD::Suld1DI64Zero:
   4405     Opc = NVPTX::SULD_1D_I64_ZERO;
   4406     Ops.push_back(TexHandle);
   4407     Ops.push_back(N->getOperand(2));
   4408     Ops.push_back(Chain);
   4409     break;
   4410   case NVPTXISD::Suld1DV2I8Zero:
   4411     Opc = NVPTX::SULD_1D_V2I8_ZERO;
   4412     Ops.push_back(TexHandle);
   4413     Ops.push_back(N->getOperand(2));
   4414     Ops.push_back(Chain);
   4415     break;
   4416   case NVPTXISD::Suld1DV2I16Zero:
   4417     Opc = NVPTX::SULD_1D_V2I16_ZERO;
   4418     Ops.push_back(TexHandle);
   4419     Ops.push_back(N->getOperand(2));
   4420     Ops.push_back(Chain);
   4421     break;
   4422   case NVPTXISD::Suld1DV2I32Zero:
   4423     Opc = NVPTX::SULD_1D_V2I32_ZERO;
   4424     Ops.push_back(TexHandle);
   4425     Ops.push_back(N->getOperand(2));
   4426     Ops.push_back(Chain);
   4427     break;
   4428   case NVPTXISD::Suld1DV2I64Zero:
   4429     Opc = NVPTX::SULD_1D_V2I64_ZERO;
   4430     Ops.push_back(TexHandle);
   4431     Ops.push_back(N->getOperand(2));
   4432     Ops.push_back(Chain);
   4433     break;
   4434   case NVPTXISD::Suld1DV4I8Zero:
   4435     Opc = NVPTX::SULD_1D_V4I8_ZERO;
   4436     Ops.push_back(TexHandle);
   4437     Ops.push_back(N->getOperand(2));
   4438     Ops.push_back(Chain);
   4439     break;
   4440   case NVPTXISD::Suld1DV4I16Zero:
   4441     Opc = NVPTX::SULD_1D_V4I16_ZERO;
   4442     Ops.push_back(TexHandle);
   4443     Ops.push_back(N->getOperand(2));
   4444     Ops.push_back(Chain);
   4445     break;
   4446   case NVPTXISD::Suld1DV4I32Zero:
   4447     Opc = NVPTX::SULD_1D_V4I32_ZERO;
   4448     Ops.push_back(TexHandle);
   4449     Ops.push_back(N->getOperand(2));
   4450     Ops.push_back(Chain);
   4451     break;
   4452   case NVPTXISD::Suld1DArrayI8Zero:
   4453     Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
   4454     Ops.push_back(TexHandle);
   4455     Ops.push_back(N->getOperand(2));
   4456     Ops.push_back(N->getOperand(3));
   4457     Ops.push_back(Chain);
   4458     break;
   4459   case NVPTXISD::Suld1DArrayI16Zero:
   4460     Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
   4461     Ops.push_back(TexHandle);
   4462     Ops.push_back(N->getOperand(2));
   4463     Ops.push_back(N->getOperand(3));
   4464     Ops.push_back(Chain);
   4465     break;
   4466   case NVPTXISD::Suld1DArrayI32Zero:
   4467     Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
   4468     Ops.push_back(TexHandle);
   4469     Ops.push_back(N->getOperand(2));
   4470     Ops.push_back(N->getOperand(3));
   4471     Ops.push_back(Chain);
   4472     break;
   4473   case NVPTXISD::Suld1DArrayI64Zero:
   4474     Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
   4475     Ops.push_back(TexHandle);
   4476     Ops.push_back(N->getOperand(2));
   4477     Ops.push_back(N->getOperand(3));
   4478     Ops.push_back(Chain);
   4479     break;
   4480   case NVPTXISD::Suld1DArrayV2I8Zero:
   4481     Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
   4482     Ops.push_back(TexHandle);
   4483     Ops.push_back(N->getOperand(2));
   4484     Ops.push_back(N->getOperand(3));
   4485     Ops.push_back(Chain);
   4486     break;
   4487   case NVPTXISD::Suld1DArrayV2I16Zero:
   4488     Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
   4489     Ops.push_back(TexHandle);
   4490     Ops.push_back(N->getOperand(2));
   4491     Ops.push_back(N->getOperand(3));
   4492     Ops.push_back(Chain);
   4493     break;
   4494   case NVPTXISD::Suld1DArrayV2I32Zero:
   4495     Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
   4496     Ops.push_back(TexHandle);
   4497     Ops.push_back(N->getOperand(2));
   4498     Ops.push_back(N->getOperand(3));
   4499     Ops.push_back(Chain);
   4500     break;
   4501   case NVPTXISD::Suld1DArrayV2I64Zero:
   4502     Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
   4503     Ops.push_back(TexHandle);
   4504     Ops.push_back(N->getOperand(2));
   4505     Ops.push_back(N->getOperand(3));
   4506     Ops.push_back(Chain);
   4507     break;
   4508   case NVPTXISD::Suld1DArrayV4I8Zero:
   4509     Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
   4510     Ops.push_back(TexHandle);
   4511     Ops.push_back(N->getOperand(2));
   4512     Ops.push_back(N->getOperand(3));
   4513     Ops.push_back(Chain);
   4514     break;
   4515   case NVPTXISD::Suld1DArrayV4I16Zero:
   4516     Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
   4517     Ops.push_back(TexHandle);
   4518     Ops.push_back(N->getOperand(2));
   4519     Ops.push_back(N->getOperand(3));
   4520     Ops.push_back(Chain);
   4521     break;
   4522   case NVPTXISD::Suld1DArrayV4I32Zero:
   4523     Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
   4524     Ops.push_back(TexHandle);
   4525     Ops.push_back(N->getOperand(2));
   4526     Ops.push_back(N->getOperand(3));
   4527     Ops.push_back(Chain);
   4528     break;
   4529   case NVPTXISD::Suld2DI8Zero:
   4530     Opc = NVPTX::SULD_2D_I8_ZERO;
   4531     Ops.push_back(TexHandle);
   4532     Ops.push_back(N->getOperand(2));
   4533     Ops.push_back(N->getOperand(3));
   4534     Ops.push_back(Chain);
   4535     break;
   4536   case NVPTXISD::Suld2DI16Zero:
   4537     Opc = NVPTX::SULD_2D_I16_ZERO;
   4538     Ops.push_back(TexHandle);
   4539     Ops.push_back(N->getOperand(2));
   4540     Ops.push_back(N->getOperand(3));
   4541     Ops.push_back(Chain);
   4542     break;
   4543   case NVPTXISD::Suld2DI32Zero:
   4544     Opc = NVPTX::SULD_2D_I32_ZERO;
   4545     Ops.push_back(TexHandle);
   4546     Ops.push_back(N->getOperand(2));
   4547     Ops.push_back(N->getOperand(3));
   4548     Ops.push_back(Chain);
   4549     break;
   4550   case NVPTXISD::Suld2DI64Zero:
   4551     Opc = NVPTX::SULD_2D_I64_ZERO;
   4552     Ops.push_back(TexHandle);
   4553     Ops.push_back(N->getOperand(2));
   4554     Ops.push_back(N->getOperand(3));
   4555     Ops.push_back(Chain);
   4556     break;
   4557   case NVPTXISD::Suld2DV2I8Zero:
   4558     Opc = NVPTX::SULD_2D_V2I8_ZERO;
   4559     Ops.push_back(TexHandle);
   4560     Ops.push_back(N->getOperand(2));
   4561     Ops.push_back(N->getOperand(3));
   4562     Ops.push_back(Chain);
   4563     break;
   4564   case NVPTXISD::Suld2DV2I16Zero:
   4565     Opc = NVPTX::SULD_2D_V2I16_ZERO;
   4566     Ops.push_back(TexHandle);
   4567     Ops.push_back(N->getOperand(2));
   4568     Ops.push_back(N->getOperand(3));
   4569     Ops.push_back(Chain);
   4570     break;
   4571   case NVPTXISD::Suld2DV2I32Zero:
   4572     Opc = NVPTX::SULD_2D_V2I32_ZERO;
   4573     Ops.push_back(TexHandle);
   4574     Ops.push_back(N->getOperand(2));
   4575     Ops.push_back(N->getOperand(3));
   4576     Ops.push_back(Chain);
   4577     break;
   4578   case NVPTXISD::Suld2DV2I64Zero:
   4579     Opc = NVPTX::SULD_2D_V2I64_ZERO;
   4580     Ops.push_back(TexHandle);
   4581     Ops.push_back(N->getOperand(2));
   4582     Ops.push_back(N->getOperand(3));
   4583     Ops.push_back(Chain);
   4584     break;
   4585   case NVPTXISD::Suld2DV4I8Zero:
   4586     Opc = NVPTX::SULD_2D_V4I8_ZERO;
   4587     Ops.push_back(TexHandle);
   4588     Ops.push_back(N->getOperand(2));
   4589     Ops.push_back(N->getOperand(3));
   4590     Ops.push_back(Chain);
   4591     break;
   4592   case NVPTXISD::Suld2DV4I16Zero:
   4593     Opc = NVPTX::SULD_2D_V4I16_ZERO;
   4594     Ops.push_back(TexHandle);
   4595     Ops.push_back(N->getOperand(2));
   4596     Ops.push_back(N->getOperand(3));
   4597     Ops.push_back(Chain);
   4598     break;
   4599   case NVPTXISD::Suld2DV4I32Zero:
   4600     Opc = NVPTX::SULD_2D_V4I32_ZERO;
   4601     Ops.push_back(TexHandle);
   4602     Ops.push_back(N->getOperand(2));
   4603     Ops.push_back(N->getOperand(3));
   4604     Ops.push_back(Chain);
   4605     break;
   4606   case NVPTXISD::Suld2DArrayI8Zero:
   4607     Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
   4608     Ops.push_back(TexHandle);
   4609     Ops.push_back(N->getOperand(2));
   4610     Ops.push_back(N->getOperand(3));
   4611     Ops.push_back(N->getOperand(4));
   4612     Ops.push_back(Chain);
   4613     break;
   4614   case NVPTXISD::Suld2DArrayI16Zero:
   4615     Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
   4616     Ops.push_back(TexHandle);
   4617     Ops.push_back(N->getOperand(2));
   4618     Ops.push_back(N->getOperand(3));
   4619     Ops.push_back(N->getOperand(4));
   4620     Ops.push_back(Chain);
   4621     break;
   4622   case NVPTXISD::Suld2DArrayI32Zero:
   4623     Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
   4624     Ops.push_back(TexHandle);
   4625     Ops.push_back(N->getOperand(2));
   4626     Ops.push_back(N->getOperand(3));
   4627     Ops.push_back(N->getOperand(4));
   4628     Ops.push_back(Chain);
   4629     break;
   4630   case NVPTXISD::Suld2DArrayI64Zero:
   4631     Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
   4632     Ops.push_back(TexHandle);
   4633     Ops.push_back(N->getOperand(2));
   4634     Ops.push_back(N->getOperand(3));
   4635     Ops.push_back(N->getOperand(4));
   4636     Ops.push_back(Chain);
   4637     break;
   4638   case NVPTXISD::Suld2DArrayV2I8Zero:
   4639     Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
   4640     Ops.push_back(TexHandle);
   4641     Ops.push_back(N->getOperand(2));
   4642     Ops.push_back(N->getOperand(3));
   4643     Ops.push_back(N->getOperand(4));
   4644     Ops.push_back(Chain);
   4645     break;
   4646   case NVPTXISD::Suld2DArrayV2I16Zero:
   4647     Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
   4648     Ops.push_back(TexHandle);
   4649     Ops.push_back(N->getOperand(2));
   4650     Ops.push_back(N->getOperand(3));
   4651     Ops.push_back(N->getOperand(4));
   4652     Ops.push_back(Chain);
   4653     break;
   4654   case NVPTXISD::Suld2DArrayV2I32Zero:
   4655     Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
   4656     Ops.push_back(TexHandle);
   4657     Ops.push_back(N->getOperand(2));
   4658     Ops.push_back(N->getOperand(3));
   4659     Ops.push_back(N->getOperand(4));
   4660     Ops.push_back(Chain);
   4661     break;
   4662   case NVPTXISD::Suld2DArrayV2I64Zero:
   4663     Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
   4664     Ops.push_back(TexHandle);
   4665     Ops.push_back(N->getOperand(2));
   4666     Ops.push_back(N->getOperand(3));
   4667     Ops.push_back(N->getOperand(4));
   4668     Ops.push_back(Chain);
   4669     break;
   4670   case NVPTXISD::Suld2DArrayV4I8Zero:
   4671     Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
   4672     Ops.push_back(TexHandle);
   4673     Ops.push_back(N->getOperand(2));
   4674     Ops.push_back(N->getOperand(3));
   4675     Ops.push_back(N->getOperand(4));
   4676     Ops.push_back(Chain);
   4677     break;
   4678   case NVPTXISD::Suld2DArrayV4I16Zero:
   4679     Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
   4680     Ops.push_back(TexHandle);
   4681     Ops.push_back(N->getOperand(2));
   4682     Ops.push_back(N->getOperand(3));
   4683     Ops.push_back(N->getOperand(4));
   4684     Ops.push_back(Chain);
   4685     break;
   4686   case NVPTXISD::Suld2DArrayV4I32Zero:
   4687     Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
   4688     Ops.push_back(TexHandle);
   4689     Ops.push_back(N->getOperand(2));
   4690     Ops.push_back(N->getOperand(3));
   4691     Ops.push_back(N->getOperand(4));
   4692     Ops.push_back(Chain);
   4693     break;
   4694   case NVPTXISD::Suld3DI8Zero:
   4695     Opc = NVPTX::SULD_3D_I8_ZERO;
   4696     Ops.push_back(TexHandle);
   4697     Ops.push_back(N->getOperand(2));
   4698     Ops.push_back(N->getOperand(3));
   4699     Ops.push_back(N->getOperand(4));
   4700     Ops.push_back(Chain);
   4701     break;
   4702   case NVPTXISD::Suld3DI16Zero:
   4703     Opc = NVPTX::SULD_3D_I16_ZERO;
   4704     Ops.push_back(TexHandle);
   4705     Ops.push_back(N->getOperand(2));
   4706     Ops.push_back(N->getOperand(3));
   4707     Ops.push_back(N->getOperand(4));
   4708     Ops.push_back(Chain);
   4709     break;
   4710   case NVPTXISD::Suld3DI32Zero:
   4711     Opc = NVPTX::SULD_3D_I32_ZERO;
   4712     Ops.push_back(TexHandle);
   4713     Ops.push_back(N->getOperand(2));
   4714     Ops.push_back(N->getOperand(3));
   4715     Ops.push_back(N->getOperand(4));
   4716     Ops.push_back(Chain);
   4717     break;
   4718   case NVPTXISD::Suld3DI64Zero:
   4719     Opc = NVPTX::SULD_3D_I64_ZERO;
   4720     Ops.push_back(TexHandle);
   4721     Ops.push_back(N->getOperand(2));
   4722     Ops.push_back(N->getOperand(3));
   4723     Ops.push_back(N->getOperand(4));
   4724     Ops.push_back(Chain);
   4725     break;
   4726   case NVPTXISD::Suld3DV2I8Zero:
   4727     Opc = NVPTX::SULD_3D_V2I8_ZERO;
   4728     Ops.push_back(TexHandle);
   4729     Ops.push_back(N->getOperand(2));
   4730     Ops.push_back(N->getOperand(3));
   4731     Ops.push_back(N->getOperand(4));
   4732     Ops.push_back(Chain);
   4733     break;
   4734   case NVPTXISD::Suld3DV2I16Zero:
   4735     Opc = NVPTX::SULD_3D_V2I16_ZERO;
   4736     Ops.push_back(TexHandle);
   4737     Ops.push_back(N->getOperand(2));
   4738     Ops.push_back(N->getOperand(3));
   4739     Ops.push_back(N->getOperand(4));
   4740     Ops.push_back(Chain);
   4741     break;
   4742   case NVPTXISD::Suld3DV2I32Zero:
   4743     Opc = NVPTX::SULD_3D_V2I32_ZERO;
   4744     Ops.push_back(TexHandle);
   4745     Ops.push_back(N->getOperand(2));
   4746     Ops.push_back(N->getOperand(3));
   4747     Ops.push_back(N->getOperand(4));
   4748     Ops.push_back(Chain);
   4749     break;
   4750   case NVPTXISD::Suld3DV2I64Zero:
   4751     Opc = NVPTX::SULD_3D_V2I64_ZERO;
   4752     Ops.push_back(TexHandle);
   4753     Ops.push_back(N->getOperand(2));
   4754     Ops.push_back(N->getOperand(3));
   4755     Ops.push_back(N->getOperand(4));
   4756     Ops.push_back(Chain);
   4757     break;
   4758   case NVPTXISD::Suld3DV4I8Zero:
   4759     Opc = NVPTX::SULD_3D_V4I8_ZERO;
   4760     Ops.push_back(TexHandle);
   4761     Ops.push_back(N->getOperand(2));
   4762     Ops.push_back(N->getOperand(3));
   4763     Ops.push_back(N->getOperand(4));
   4764     Ops.push_back(Chain);
   4765     break;
   4766   case NVPTXISD::Suld3DV4I16Zero:
   4767     Opc = NVPTX::SULD_3D_V4I16_ZERO;
   4768     Ops.push_back(TexHandle);
   4769     Ops.push_back(N->getOperand(2));
   4770     Ops.push_back(N->getOperand(3));
   4771     Ops.push_back(N->getOperand(4));
   4772     Ops.push_back(Chain);
   4773     break;
   4774   case NVPTXISD::Suld3DV4I32Zero:
   4775     Opc = NVPTX::SULD_3D_V4I32_ZERO;
   4776     Ops.push_back(TexHandle);
   4777     Ops.push_back(N->getOperand(2));
   4778     Ops.push_back(N->getOperand(3));
   4779     Ops.push_back(N->getOperand(4));
   4780     Ops.push_back(Chain);
   4781     break;
   4782   }
   4783   Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
   4784   return Ret;
   4785 }
   4786 
   4787 
   4788 /// SelectBFE - Look for instruction sequences that can be made more efficient
   4789 /// by using the 'bfe' (bit-field extract) PTX instruction
   4790 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
   4791   SDLoc DL(N);
   4792   SDValue LHS = N->getOperand(0);
   4793   SDValue RHS = N->getOperand(1);
   4794   SDValue Len;
   4795   SDValue Start;
   4796   SDValue Val;
   4797   bool IsSigned = false;
   4798 
   4799   if (N->getOpcode() == ISD::AND) {
   4800     // Canonicalize the operands
   4801     // We want 'and %val, %mask'
   4802     if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
   4803       std::swap(LHS, RHS);
   4804     }
   4805 
   4806     ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
   4807     if (!Mask) {
   4808       // We need a constant mask on the RHS of the AND
   4809       return NULL;
   4810     }
   4811 
   4812     // Extract the mask bits
   4813     uint64_t MaskVal = Mask->getZExtValue();
   4814     if (!isMask_64(MaskVal)) {
   4815       // We *could* handle shifted masks here, but doing so would require an
   4816       // 'and' operation to fix up the low-order bits so we would trade
   4817       // shr+and for bfe+and, which has the same throughput
   4818       return NULL;
   4819     }
   4820 
   4821     // How many bits are in our mask?
   4822     uint64_t NumBits = countTrailingOnes(MaskVal);
   4823     Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
   4824 
   4825     if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
   4826       // We have a 'srl/and' pair, extract the effective start bit and length
   4827       Val = LHS.getNode()->getOperand(0);
   4828       Start = LHS.getNode()->getOperand(1);
   4829       ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
   4830       if (StartConst) {
   4831         uint64_t StartVal = StartConst->getZExtValue();
   4832         // How many "good" bits do we have left?  "good" is defined here as bits
   4833         // that exist in the original value, not shifted in.
   4834         uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
   4835         if (NumBits > GoodBits) {
   4836           // Do not handle the case where bits have been shifted in. In theory
   4837           // we could handle this, but the cost is likely higher than just
   4838           // emitting the srl/and pair.
   4839           return NULL;
   4840         }
   4841         Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
   4842       } else {
   4843         // Do not handle the case where the shift amount (can be zero if no srl
   4844         // was found) is not constant. We could handle this case, but it would
   4845         // require run-time logic that would be more expensive than just
   4846         // emitting the srl/and pair.
   4847         return NULL;
   4848       }
   4849     } else {
   4850       // Do not handle the case where the LHS of the and is not a shift. While
   4851       // it would be trivial to handle this case, it would just transform
   4852       // 'and' -> 'bfe', but 'and' has higher-throughput.
   4853       return NULL;
   4854     }
   4855   } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
   4856     if (LHS->getOpcode() == ISD::AND) {
   4857       ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
   4858       if (!ShiftCnst) {
   4859         // Shift amount must be constant
   4860         return NULL;
   4861       }
   4862 
   4863       uint64_t ShiftAmt = ShiftCnst->getZExtValue();
   4864 
   4865       SDValue AndLHS = LHS->getOperand(0);
   4866       SDValue AndRHS = LHS->getOperand(1);
   4867 
   4868       // Canonicalize the AND to have the mask on the RHS
   4869       if (isa<ConstantSDNode>(AndLHS)) {
   4870         std::swap(AndLHS, AndRHS);
   4871       }
   4872 
   4873       ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
   4874       if (!MaskCnst) {
   4875         // Mask must be constant
   4876         return NULL;
   4877       }
   4878 
   4879       uint64_t MaskVal = MaskCnst->getZExtValue();
   4880       uint64_t NumZeros;
   4881       uint64_t NumBits;
   4882       if (isMask_64(MaskVal)) {
   4883         NumZeros = 0;
   4884         // The number of bits in the result bitfield will be the number of
   4885         // trailing ones (the AND) minus the number of bits we shift off
   4886         NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
   4887       } else if (isShiftedMask_64(MaskVal)) {
   4888         NumZeros = countTrailingZeros(MaskVal);
   4889         unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
   4890         // The number of bits in the result bitfield will be the number of
   4891         // trailing zeros plus the number of set bits in the mask minus the
   4892         // number of bits we shift off
   4893         NumBits = NumZeros + NumOnes - ShiftAmt;
   4894       } else {
   4895         // This is not a mask we can handle
   4896         return NULL;
   4897       }
   4898 
   4899       if (ShiftAmt < NumZeros) {
   4900         // Handling this case would require extra logic that would make this
   4901         // transformation non-profitable
   4902         return NULL;
   4903       }
   4904 
   4905       Val = AndLHS;
   4906       Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
   4907       Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
   4908     } else if (LHS->getOpcode() == ISD::SHL) {
   4909       // Here, we have a pattern like:
   4910       //
   4911       // (sra (shl val, NN), MM)
   4912       // or
   4913       // (srl (shl val, NN), MM)
   4914       //
   4915       // If MM >= NN, we can efficiently optimize this with bfe
   4916       Val = LHS->getOperand(0);
   4917 
   4918       SDValue ShlRHS = LHS->getOperand(1);
   4919       ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
   4920       if (!ShlCnst) {
   4921         // Shift amount must be constant
   4922         return NULL;
   4923       }
   4924       uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
   4925 
   4926       SDValue ShrRHS = RHS;
   4927       ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
   4928       if (!ShrCnst) {
   4929         // Shift amount must be constant
   4930         return NULL;
   4931       }
   4932       uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
   4933 
   4934       // To avoid extra codegen and be profitable, we need Outer >= Inner
   4935       if (OuterShiftAmt < InnerShiftAmt) {
   4936         return NULL;
   4937       }
   4938 
   4939       // If the outer shift is more than the type size, we have no bitfield to
   4940       // extract (since we also check that the inner shift is <= the outer shift
   4941       // then this also implies that the inner shift is < the type size)
   4942       if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
   4943         return NULL;
   4944       }
   4945 
   4946       Start =
   4947         CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
   4948       Len =
   4949         CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
   4950                                   OuterShiftAmt, DL, MVT::i32);
   4951 
   4952       if (N->getOpcode() == ISD::SRA) {
   4953         // If we have a arithmetic right shift, we need to use the signed bfe
   4954         // variant
   4955         IsSigned = true;
   4956       }
   4957     } else {
   4958       // No can do...
   4959       return NULL;
   4960     }
   4961   } else {
   4962     // No can do...
   4963     return NULL;
   4964   }
   4965 
   4966 
   4967   unsigned Opc;
   4968   // For the BFE operations we form here from "and" and "srl", always use the
   4969   // unsigned variants.
   4970   if (Val.getValueType() == MVT::i32) {
   4971     if (IsSigned) {
   4972       Opc = NVPTX::BFE_S32rii;
   4973     } else {
   4974       Opc = NVPTX::BFE_U32rii;
   4975     }
   4976   } else if (Val.getValueType() == MVT::i64) {
   4977     if (IsSigned) {
   4978       Opc = NVPTX::BFE_S64rii;
   4979     } else {
   4980       Opc = NVPTX::BFE_U64rii;
   4981     }
   4982   } else {
   4983     // We cannot handle this type
   4984     return NULL;
   4985   }
   4986 
   4987   SDValue Ops[] = {
   4988     Val, Start, Len
   4989   };
   4990 
   4991   return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
   4992 }
   4993 
   4994 // SelectDirectAddr - Match a direct address for DAG.
   4995 // A direct address could be a globaladdress or externalsymbol.
   4996 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
   4997   // Return true if TGA or ES.
   4998   if (N.getOpcode() == ISD::TargetGlobalAddress ||
   4999       N.getOpcode() == ISD::TargetExternalSymbol) {
   5000     Address = N;
   5001     return true;
   5002   }
   5003   if (N.getOpcode() == NVPTXISD::Wrapper) {
   5004     Address = N.getOperand(0);
   5005     return true;
   5006   }
   5007   if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
   5008     unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
   5009     if (IID == Intrinsic::nvvm_ptr_gen_to_param)
   5010       if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
   5011         return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
   5012   }
   5013   return false;
   5014 }
   5015 
   5016 // symbol+offset
   5017 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
   5018     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   5019   if (Addr.getOpcode() == ISD::ADD) {
   5020     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
   5021       SDValue base = Addr.getOperand(0);
   5022       if (SelectDirectAddr(base, Base)) {
   5023         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
   5024                                            mvt);
   5025         return true;
   5026       }
   5027     }
   5028   }
   5029   return false;
   5030 }
   5031 
   5032 // symbol+offset
   5033 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
   5034                                      SDValue &Base, SDValue &Offset) {
   5035   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
   5036 }
   5037 
   5038 // symbol+offset
   5039 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
   5040                                        SDValue &Base, SDValue &Offset) {
   5041   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
   5042 }
   5043 
   5044 // register+offset
   5045 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
   5046     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   5047   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
   5048     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
   5049     Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
   5050     return true;
   5051   }
   5052   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
   5053       Addr.getOpcode() == ISD::TargetGlobalAddress)
   5054     return false; // direct calls.
   5055 
   5056   if (Addr.getOpcode() == ISD::ADD) {
   5057     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
   5058       return false;
   5059     }
   5060     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
   5061       if (FrameIndexSDNode *FIN =
   5062               dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
   5063         // Constant offset from frame ref.
   5064         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
   5065       else
   5066         Base = Addr.getOperand(0);
   5067       Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
   5068                                          mvt);
   5069       return true;
   5070     }
   5071   }
   5072   return false;
   5073 }
   5074 
   5075 // register+offset
   5076 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
   5077                                      SDValue &Base, SDValue &Offset) {
   5078   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
   5079 }
   5080 
   5081 // register+offset
   5082 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
   5083                                        SDValue &Base, SDValue &Offset) {
   5084   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
   5085 }
   5086 
   5087 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
   5088                                                  unsigned int spN) const {
   5089   const Value *Src = nullptr;
   5090   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
   5091     if (spN == 0 && mN->getMemOperand()->getPseudoValue())
   5092       return true;
   5093     Src = mN->getMemOperand()->getValue();
   5094   }
   5095   if (!Src)
   5096     return false;
   5097   if (auto *PT = dyn_cast<PointerType>(Src->getType()))
   5098     return (PT->getAddressSpace() == spN);
   5099   return false;
   5100 }
   5101 
   5102 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   5103 /// inline asm expressions.
   5104 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
   5105     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
   5106   SDValue Op0, Op1;
   5107   switch (ConstraintID) {
   5108   default:
   5109     return true;
   5110   case InlineAsm::Constraint_m: // memory
   5111     if (SelectDirectAddr(Op, Op0)) {
   5112       OutOps.push_back(Op0);
   5113       OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
   5114       return false;
   5115     }
   5116     if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
   5117       OutOps.push_back(Op0);
   5118       OutOps.push_back(Op1);
   5119       return false;
   5120     }
   5121     break;
   5122   }
   5123   return true;
   5124 }
   5125