Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines an instruction selector for the NVPTX target.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "NVPTXISelDAGToDAG.h"
     15 #include "NVPTXUtilities.h"
     16 #include "llvm/Analysis/ValueTracking.h"
     17 #include "llvm/IR/GlobalValue.h"
     18 #include "llvm/IR/Instructions.h"
     19 #include "llvm/Support/CommandLine.h"
     20 #include "llvm/Support/Debug.h"
     21 #include "llvm/Support/ErrorHandling.h"
     22 #include "llvm/Support/raw_ostream.h"
     23 #include "llvm/Target/TargetIntrinsicInfo.h"
     24 
     25 using namespace llvm;
     26 
     27 #define DEBUG_TYPE "nvptx-isel"
     28 
     29 static cl::opt<int> UsePrecDivF32(
     30     "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
     31     cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
     32              " IEEE Compliant F32 div.rnd if available."),
     33     cl::init(2));
     34 
     35 static cl::opt<bool>
     36 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
     37           cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
     38           cl::init(true));
     39 
     40 static cl::opt<bool>
     41 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
     42            cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
     43            cl::init(false));
     44 
     45 
     46 /// createNVPTXISelDag - This pass converts a legalized DAG into a
     47 /// NVPTX-specific DAG, ready for instruction scheduling.
     48 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
     49                                        llvm::CodeGenOpt::Level OptLevel) {
     50   return new NVPTXDAGToDAGISel(TM, OptLevel);
     51 }
     52 
     53 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
     54                                      CodeGenOpt::Level OptLevel)
     55     : SelectionDAGISel(tm, OptLevel), TM(tm) {
     56   doMulWide = (OptLevel > 0);
     57 }
     58 
     59 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
     60     Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
     61     return SelectionDAGISel::runOnMachineFunction(MF);
     62 }
     63 
     64 int NVPTXDAGToDAGISel::getDivF32Level() const {
     65   if (UsePrecDivF32.getNumOccurrences() > 0) {
     66     // If nvptx-prec-div32=N is used on the command-line, always honor it
     67     return UsePrecDivF32;
     68   } else {
     69     // Otherwise, use div.approx if fast math is enabled
     70     if (TM.Options.UnsafeFPMath)
     71       return 0;
     72     else
     73       return 2;
     74   }
     75 }
     76 
     77 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
     78   if (UsePrecSqrtF32.getNumOccurrences() > 0) {
     79     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
     80     return UsePrecSqrtF32;
     81   } else {
     82     // Otherwise, use sqrt.approx if fast math is enabled
     83     return !TM.Options.UnsafeFPMath;
     84   }
     85 }
     86 
     87 bool NVPTXDAGToDAGISel::useF32FTZ() const {
     88   if (FtzEnabled.getNumOccurrences() > 0) {
     89     // If nvptx-f32ftz is used on the command-line, always honor it
     90     return FtzEnabled;
     91   } else {
     92     const Function *F = MF->getFunction();
     93     // Otherwise, check for an nvptx-f32ftz attribute on the function
     94     if (F->hasFnAttribute("nvptx-f32ftz"))
     95       return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
     96     else
     97       return false;
     98   }
     99 }
    100 
    101 bool NVPTXDAGToDAGISel::allowFMA() const {
    102   const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
    103   return TL->allowFMA(*MF, OptLevel);
    104 }
    105 
    106 /// Select - Select instructions not customized! Used for
    107 /// expanded, promoted and normal instructions.
    108 void NVPTXDAGToDAGISel::Select(SDNode *N) {
    109 
    110   if (N->isMachineOpcode()) {
    111     N->setNodeId(-1);
    112     return; // Already selected.
    113   }
    114 
    115   switch (N->getOpcode()) {
    116   case ISD::LOAD:
    117     if (tryLoad(N))
    118       return;
    119     break;
    120   case ISD::STORE:
    121     if (tryStore(N))
    122       return;
    123     break;
    124   case NVPTXISD::LoadV2:
    125   case NVPTXISD::LoadV4:
    126     if (tryLoadVector(N))
    127       return;
    128     break;
    129   case NVPTXISD::LDGV2:
    130   case NVPTXISD::LDGV4:
    131   case NVPTXISD::LDUV2:
    132   case NVPTXISD::LDUV4:
    133     if (tryLDGLDU(N))
    134       return;
    135     break;
    136   case NVPTXISD::StoreV2:
    137   case NVPTXISD::StoreV4:
    138     if (tryStoreVector(N))
    139       return;
    140     break;
    141   case NVPTXISD::LoadParam:
    142   case NVPTXISD::LoadParamV2:
    143   case NVPTXISD::LoadParamV4:
    144     if (tryLoadParam(N))
    145       return;
    146     break;
    147   case NVPTXISD::StoreRetval:
    148   case NVPTXISD::StoreRetvalV2:
    149   case NVPTXISD::StoreRetvalV4:
    150     if (tryStoreRetval(N))
    151       return;
    152     break;
    153   case NVPTXISD::StoreParam:
    154   case NVPTXISD::StoreParamV2:
    155   case NVPTXISD::StoreParamV4:
    156   case NVPTXISD::StoreParamS32:
    157   case NVPTXISD::StoreParamU32:
    158     if (tryStoreParam(N))
    159       return;
    160     break;
    161   case ISD::INTRINSIC_WO_CHAIN:
    162     if (tryIntrinsicNoChain(N))
    163       return;
    164     break;
    165   case ISD::INTRINSIC_W_CHAIN:
    166     if (tryIntrinsicChain(N))
    167       return;
    168     break;
    169   case NVPTXISD::Tex1DFloatS32:
    170   case NVPTXISD::Tex1DFloatFloat:
    171   case NVPTXISD::Tex1DFloatFloatLevel:
    172   case NVPTXISD::Tex1DFloatFloatGrad:
    173   case NVPTXISD::Tex1DS32S32:
    174   case NVPTXISD::Tex1DS32Float:
    175   case NVPTXISD::Tex1DS32FloatLevel:
    176   case NVPTXISD::Tex1DS32FloatGrad:
    177   case NVPTXISD::Tex1DU32S32:
    178   case NVPTXISD::Tex1DU32Float:
    179   case NVPTXISD::Tex1DU32FloatLevel:
    180   case NVPTXISD::Tex1DU32FloatGrad:
    181   case NVPTXISD::Tex1DArrayFloatS32:
    182   case NVPTXISD::Tex1DArrayFloatFloat:
    183   case NVPTXISD::Tex1DArrayFloatFloatLevel:
    184   case NVPTXISD::Tex1DArrayFloatFloatGrad:
    185   case NVPTXISD::Tex1DArrayS32S32:
    186   case NVPTXISD::Tex1DArrayS32Float:
    187   case NVPTXISD::Tex1DArrayS32FloatLevel:
    188   case NVPTXISD::Tex1DArrayS32FloatGrad:
    189   case NVPTXISD::Tex1DArrayU32S32:
    190   case NVPTXISD::Tex1DArrayU32Float:
    191   case NVPTXISD::Tex1DArrayU32FloatLevel:
    192   case NVPTXISD::Tex1DArrayU32FloatGrad:
    193   case NVPTXISD::Tex2DFloatS32:
    194   case NVPTXISD::Tex2DFloatFloat:
    195   case NVPTXISD::Tex2DFloatFloatLevel:
    196   case NVPTXISD::Tex2DFloatFloatGrad:
    197   case NVPTXISD::Tex2DS32S32:
    198   case NVPTXISD::Tex2DS32Float:
    199   case NVPTXISD::Tex2DS32FloatLevel:
    200   case NVPTXISD::Tex2DS32FloatGrad:
    201   case NVPTXISD::Tex2DU32S32:
    202   case NVPTXISD::Tex2DU32Float:
    203   case NVPTXISD::Tex2DU32FloatLevel:
    204   case NVPTXISD::Tex2DU32FloatGrad:
    205   case NVPTXISD::Tex2DArrayFloatS32:
    206   case NVPTXISD::Tex2DArrayFloatFloat:
    207   case NVPTXISD::Tex2DArrayFloatFloatLevel:
    208   case NVPTXISD::Tex2DArrayFloatFloatGrad:
    209   case NVPTXISD::Tex2DArrayS32S32:
    210   case NVPTXISD::Tex2DArrayS32Float:
    211   case NVPTXISD::Tex2DArrayS32FloatLevel:
    212   case NVPTXISD::Tex2DArrayS32FloatGrad:
    213   case NVPTXISD::Tex2DArrayU32S32:
    214   case NVPTXISD::Tex2DArrayU32Float:
    215   case NVPTXISD::Tex2DArrayU32FloatLevel:
    216   case NVPTXISD::Tex2DArrayU32FloatGrad:
    217   case NVPTXISD::Tex3DFloatS32:
    218   case NVPTXISD::Tex3DFloatFloat:
    219   case NVPTXISD::Tex3DFloatFloatLevel:
    220   case NVPTXISD::Tex3DFloatFloatGrad:
    221   case NVPTXISD::Tex3DS32S32:
    222   case NVPTXISD::Tex3DS32Float:
    223   case NVPTXISD::Tex3DS32FloatLevel:
    224   case NVPTXISD::Tex3DS32FloatGrad:
    225   case NVPTXISD::Tex3DU32S32:
    226   case NVPTXISD::Tex3DU32Float:
    227   case NVPTXISD::Tex3DU32FloatLevel:
    228   case NVPTXISD::Tex3DU32FloatGrad:
    229   case NVPTXISD::TexCubeFloatFloat:
    230   case NVPTXISD::TexCubeFloatFloatLevel:
    231   case NVPTXISD::TexCubeS32Float:
    232   case NVPTXISD::TexCubeS32FloatLevel:
    233   case NVPTXISD::TexCubeU32Float:
    234   case NVPTXISD::TexCubeU32FloatLevel:
    235   case NVPTXISD::TexCubeArrayFloatFloat:
    236   case NVPTXISD::TexCubeArrayFloatFloatLevel:
    237   case NVPTXISD::TexCubeArrayS32Float:
    238   case NVPTXISD::TexCubeArrayS32FloatLevel:
    239   case NVPTXISD::TexCubeArrayU32Float:
    240   case NVPTXISD::TexCubeArrayU32FloatLevel:
    241   case NVPTXISD::Tld4R2DFloatFloat:
    242   case NVPTXISD::Tld4G2DFloatFloat:
    243   case NVPTXISD::Tld4B2DFloatFloat:
    244   case NVPTXISD::Tld4A2DFloatFloat:
    245   case NVPTXISD::Tld4R2DS64Float:
    246   case NVPTXISD::Tld4G2DS64Float:
    247   case NVPTXISD::Tld4B2DS64Float:
    248   case NVPTXISD::Tld4A2DS64Float:
    249   case NVPTXISD::Tld4R2DU64Float:
    250   case NVPTXISD::Tld4G2DU64Float:
    251   case NVPTXISD::Tld4B2DU64Float:
    252   case NVPTXISD::Tld4A2DU64Float:
    253   case NVPTXISD::TexUnified1DFloatS32:
    254   case NVPTXISD::TexUnified1DFloatFloat:
    255   case NVPTXISD::TexUnified1DFloatFloatLevel:
    256   case NVPTXISD::TexUnified1DFloatFloatGrad:
    257   case NVPTXISD::TexUnified1DS32S32:
    258   case NVPTXISD::TexUnified1DS32Float:
    259   case NVPTXISD::TexUnified1DS32FloatLevel:
    260   case NVPTXISD::TexUnified1DS32FloatGrad:
    261   case NVPTXISD::TexUnified1DU32S32:
    262   case NVPTXISD::TexUnified1DU32Float:
    263   case NVPTXISD::TexUnified1DU32FloatLevel:
    264   case NVPTXISD::TexUnified1DU32FloatGrad:
    265   case NVPTXISD::TexUnified1DArrayFloatS32:
    266   case NVPTXISD::TexUnified1DArrayFloatFloat:
    267   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
    268   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
    269   case NVPTXISD::TexUnified1DArrayS32S32:
    270   case NVPTXISD::TexUnified1DArrayS32Float:
    271   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
    272   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
    273   case NVPTXISD::TexUnified1DArrayU32S32:
    274   case NVPTXISD::TexUnified1DArrayU32Float:
    275   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
    276   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
    277   case NVPTXISD::TexUnified2DFloatS32:
    278   case NVPTXISD::TexUnified2DFloatFloat:
    279   case NVPTXISD::TexUnified2DFloatFloatLevel:
    280   case NVPTXISD::TexUnified2DFloatFloatGrad:
    281   case NVPTXISD::TexUnified2DS32S32:
    282   case NVPTXISD::TexUnified2DS32Float:
    283   case NVPTXISD::TexUnified2DS32FloatLevel:
    284   case NVPTXISD::TexUnified2DS32FloatGrad:
    285   case NVPTXISD::TexUnified2DU32S32:
    286   case NVPTXISD::TexUnified2DU32Float:
    287   case NVPTXISD::TexUnified2DU32FloatLevel:
    288   case NVPTXISD::TexUnified2DU32FloatGrad:
    289   case NVPTXISD::TexUnified2DArrayFloatS32:
    290   case NVPTXISD::TexUnified2DArrayFloatFloat:
    291   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
    292   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
    293   case NVPTXISD::TexUnified2DArrayS32S32:
    294   case NVPTXISD::TexUnified2DArrayS32Float:
    295   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
    296   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
    297   case NVPTXISD::TexUnified2DArrayU32S32:
    298   case NVPTXISD::TexUnified2DArrayU32Float:
    299   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
    300   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
    301   case NVPTXISD::TexUnified3DFloatS32:
    302   case NVPTXISD::TexUnified3DFloatFloat:
    303   case NVPTXISD::TexUnified3DFloatFloatLevel:
    304   case NVPTXISD::TexUnified3DFloatFloatGrad:
    305   case NVPTXISD::TexUnified3DS32S32:
    306   case NVPTXISD::TexUnified3DS32Float:
    307   case NVPTXISD::TexUnified3DS32FloatLevel:
    308   case NVPTXISD::TexUnified3DS32FloatGrad:
    309   case NVPTXISD::TexUnified3DU32S32:
    310   case NVPTXISD::TexUnified3DU32Float:
    311   case NVPTXISD::TexUnified3DU32FloatLevel:
    312   case NVPTXISD::TexUnified3DU32FloatGrad:
    313   case NVPTXISD::TexUnifiedCubeFloatFloat:
    314   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
    315   case NVPTXISD::TexUnifiedCubeS32Float:
    316   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
    317   case NVPTXISD::TexUnifiedCubeU32Float:
    318   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
    319   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
    320   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
    321   case NVPTXISD::TexUnifiedCubeArrayS32Float:
    322   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
    323   case NVPTXISD::TexUnifiedCubeArrayU32Float:
    324   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
    325   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
    326   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
    327   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
    328   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
    329   case NVPTXISD::Tld4UnifiedR2DS64Float:
    330   case NVPTXISD::Tld4UnifiedG2DS64Float:
    331   case NVPTXISD::Tld4UnifiedB2DS64Float:
    332   case NVPTXISD::Tld4UnifiedA2DS64Float:
    333   case NVPTXISD::Tld4UnifiedR2DU64Float:
    334   case NVPTXISD::Tld4UnifiedG2DU64Float:
    335   case NVPTXISD::Tld4UnifiedB2DU64Float:
    336   case NVPTXISD::Tld4UnifiedA2DU64Float:
    337     if (tryTextureIntrinsic(N))
    338       return;
    339     break;
    340   case NVPTXISD::Suld1DI8Clamp:
    341   case NVPTXISD::Suld1DI16Clamp:
    342   case NVPTXISD::Suld1DI32Clamp:
    343   case NVPTXISD::Suld1DI64Clamp:
    344   case NVPTXISD::Suld1DV2I8Clamp:
    345   case NVPTXISD::Suld1DV2I16Clamp:
    346   case NVPTXISD::Suld1DV2I32Clamp:
    347   case NVPTXISD::Suld1DV2I64Clamp:
    348   case NVPTXISD::Suld1DV4I8Clamp:
    349   case NVPTXISD::Suld1DV4I16Clamp:
    350   case NVPTXISD::Suld1DV4I32Clamp:
    351   case NVPTXISD::Suld1DArrayI8Clamp:
    352   case NVPTXISD::Suld1DArrayI16Clamp:
    353   case NVPTXISD::Suld1DArrayI32Clamp:
    354   case NVPTXISD::Suld1DArrayI64Clamp:
    355   case NVPTXISD::Suld1DArrayV2I8Clamp:
    356   case NVPTXISD::Suld1DArrayV2I16Clamp:
    357   case NVPTXISD::Suld1DArrayV2I32Clamp:
    358   case NVPTXISD::Suld1DArrayV2I64Clamp:
    359   case NVPTXISD::Suld1DArrayV4I8Clamp:
    360   case NVPTXISD::Suld1DArrayV4I16Clamp:
    361   case NVPTXISD::Suld1DArrayV4I32Clamp:
    362   case NVPTXISD::Suld2DI8Clamp:
    363   case NVPTXISD::Suld2DI16Clamp:
    364   case NVPTXISD::Suld2DI32Clamp:
    365   case NVPTXISD::Suld2DI64Clamp:
    366   case NVPTXISD::Suld2DV2I8Clamp:
    367   case NVPTXISD::Suld2DV2I16Clamp:
    368   case NVPTXISD::Suld2DV2I32Clamp:
    369   case NVPTXISD::Suld2DV2I64Clamp:
    370   case NVPTXISD::Suld2DV4I8Clamp:
    371   case NVPTXISD::Suld2DV4I16Clamp:
    372   case NVPTXISD::Suld2DV4I32Clamp:
    373   case NVPTXISD::Suld2DArrayI8Clamp:
    374   case NVPTXISD::Suld2DArrayI16Clamp:
    375   case NVPTXISD::Suld2DArrayI32Clamp:
    376   case NVPTXISD::Suld2DArrayI64Clamp:
    377   case NVPTXISD::Suld2DArrayV2I8Clamp:
    378   case NVPTXISD::Suld2DArrayV2I16Clamp:
    379   case NVPTXISD::Suld2DArrayV2I32Clamp:
    380   case NVPTXISD::Suld2DArrayV2I64Clamp:
    381   case NVPTXISD::Suld2DArrayV4I8Clamp:
    382   case NVPTXISD::Suld2DArrayV4I16Clamp:
    383   case NVPTXISD::Suld2DArrayV4I32Clamp:
    384   case NVPTXISD::Suld3DI8Clamp:
    385   case NVPTXISD::Suld3DI16Clamp:
    386   case NVPTXISD::Suld3DI32Clamp:
    387   case NVPTXISD::Suld3DI64Clamp:
    388   case NVPTXISD::Suld3DV2I8Clamp:
    389   case NVPTXISD::Suld3DV2I16Clamp:
    390   case NVPTXISD::Suld3DV2I32Clamp:
    391   case NVPTXISD::Suld3DV2I64Clamp:
    392   case NVPTXISD::Suld3DV4I8Clamp:
    393   case NVPTXISD::Suld3DV4I16Clamp:
    394   case NVPTXISD::Suld3DV4I32Clamp:
    395   case NVPTXISD::Suld1DI8Trap:
    396   case NVPTXISD::Suld1DI16Trap:
    397   case NVPTXISD::Suld1DI32Trap:
    398   case NVPTXISD::Suld1DI64Trap:
    399   case NVPTXISD::Suld1DV2I8Trap:
    400   case NVPTXISD::Suld1DV2I16Trap:
    401   case NVPTXISD::Suld1DV2I32Trap:
    402   case NVPTXISD::Suld1DV2I64Trap:
    403   case NVPTXISD::Suld1DV4I8Trap:
    404   case NVPTXISD::Suld1DV4I16Trap:
    405   case NVPTXISD::Suld1DV4I32Trap:
    406   case NVPTXISD::Suld1DArrayI8Trap:
    407   case NVPTXISD::Suld1DArrayI16Trap:
    408   case NVPTXISD::Suld1DArrayI32Trap:
    409   case NVPTXISD::Suld1DArrayI64Trap:
    410   case NVPTXISD::Suld1DArrayV2I8Trap:
    411   case NVPTXISD::Suld1DArrayV2I16Trap:
    412   case NVPTXISD::Suld1DArrayV2I32Trap:
    413   case NVPTXISD::Suld1DArrayV2I64Trap:
    414   case NVPTXISD::Suld1DArrayV4I8Trap:
    415   case NVPTXISD::Suld1DArrayV4I16Trap:
    416   case NVPTXISD::Suld1DArrayV4I32Trap:
    417   case NVPTXISD::Suld2DI8Trap:
    418   case NVPTXISD::Suld2DI16Trap:
    419   case NVPTXISD::Suld2DI32Trap:
    420   case NVPTXISD::Suld2DI64Trap:
    421   case NVPTXISD::Suld2DV2I8Trap:
    422   case NVPTXISD::Suld2DV2I16Trap:
    423   case NVPTXISD::Suld2DV2I32Trap:
    424   case NVPTXISD::Suld2DV2I64Trap:
    425   case NVPTXISD::Suld2DV4I8Trap:
    426   case NVPTXISD::Suld2DV4I16Trap:
    427   case NVPTXISD::Suld2DV4I32Trap:
    428   case NVPTXISD::Suld2DArrayI8Trap:
    429   case NVPTXISD::Suld2DArrayI16Trap:
    430   case NVPTXISD::Suld2DArrayI32Trap:
    431   case NVPTXISD::Suld2DArrayI64Trap:
    432   case NVPTXISD::Suld2DArrayV2I8Trap:
    433   case NVPTXISD::Suld2DArrayV2I16Trap:
    434   case NVPTXISD::Suld2DArrayV2I32Trap:
    435   case NVPTXISD::Suld2DArrayV2I64Trap:
    436   case NVPTXISD::Suld2DArrayV4I8Trap:
    437   case NVPTXISD::Suld2DArrayV4I16Trap:
    438   case NVPTXISD::Suld2DArrayV4I32Trap:
    439   case NVPTXISD::Suld3DI8Trap:
    440   case NVPTXISD::Suld3DI16Trap:
    441   case NVPTXISD::Suld3DI32Trap:
    442   case NVPTXISD::Suld3DI64Trap:
    443   case NVPTXISD::Suld3DV2I8Trap:
    444   case NVPTXISD::Suld3DV2I16Trap:
    445   case NVPTXISD::Suld3DV2I32Trap:
    446   case NVPTXISD::Suld3DV2I64Trap:
    447   case NVPTXISD::Suld3DV4I8Trap:
    448   case NVPTXISD::Suld3DV4I16Trap:
    449   case NVPTXISD::Suld3DV4I32Trap:
    450   case NVPTXISD::Suld1DI8Zero:
    451   case NVPTXISD::Suld1DI16Zero:
    452   case NVPTXISD::Suld1DI32Zero:
    453   case NVPTXISD::Suld1DI64Zero:
    454   case NVPTXISD::Suld1DV2I8Zero:
    455   case NVPTXISD::Suld1DV2I16Zero:
    456   case NVPTXISD::Suld1DV2I32Zero:
    457   case NVPTXISD::Suld1DV2I64Zero:
    458   case NVPTXISD::Suld1DV4I8Zero:
    459   case NVPTXISD::Suld1DV4I16Zero:
    460   case NVPTXISD::Suld1DV4I32Zero:
    461   case NVPTXISD::Suld1DArrayI8Zero:
    462   case NVPTXISD::Suld1DArrayI16Zero:
    463   case NVPTXISD::Suld1DArrayI32Zero:
    464   case NVPTXISD::Suld1DArrayI64Zero:
    465   case NVPTXISD::Suld1DArrayV2I8Zero:
    466   case NVPTXISD::Suld1DArrayV2I16Zero:
    467   case NVPTXISD::Suld1DArrayV2I32Zero:
    468   case NVPTXISD::Suld1DArrayV2I64Zero:
    469   case NVPTXISD::Suld1DArrayV4I8Zero:
    470   case NVPTXISD::Suld1DArrayV4I16Zero:
    471   case NVPTXISD::Suld1DArrayV4I32Zero:
    472   case NVPTXISD::Suld2DI8Zero:
    473   case NVPTXISD::Suld2DI16Zero:
    474   case NVPTXISD::Suld2DI32Zero:
    475   case NVPTXISD::Suld2DI64Zero:
    476   case NVPTXISD::Suld2DV2I8Zero:
    477   case NVPTXISD::Suld2DV2I16Zero:
    478   case NVPTXISD::Suld2DV2I32Zero:
    479   case NVPTXISD::Suld2DV2I64Zero:
    480   case NVPTXISD::Suld2DV4I8Zero:
    481   case NVPTXISD::Suld2DV4I16Zero:
    482   case NVPTXISD::Suld2DV4I32Zero:
    483   case NVPTXISD::Suld2DArrayI8Zero:
    484   case NVPTXISD::Suld2DArrayI16Zero:
    485   case NVPTXISD::Suld2DArrayI32Zero:
    486   case NVPTXISD::Suld2DArrayI64Zero:
    487   case NVPTXISD::Suld2DArrayV2I8Zero:
    488   case NVPTXISD::Suld2DArrayV2I16Zero:
    489   case NVPTXISD::Suld2DArrayV2I32Zero:
    490   case NVPTXISD::Suld2DArrayV2I64Zero:
    491   case NVPTXISD::Suld2DArrayV4I8Zero:
    492   case NVPTXISD::Suld2DArrayV4I16Zero:
    493   case NVPTXISD::Suld2DArrayV4I32Zero:
    494   case NVPTXISD::Suld3DI8Zero:
    495   case NVPTXISD::Suld3DI16Zero:
    496   case NVPTXISD::Suld3DI32Zero:
    497   case NVPTXISD::Suld3DI64Zero:
    498   case NVPTXISD::Suld3DV2I8Zero:
    499   case NVPTXISD::Suld3DV2I16Zero:
    500   case NVPTXISD::Suld3DV2I32Zero:
    501   case NVPTXISD::Suld3DV2I64Zero:
    502   case NVPTXISD::Suld3DV4I8Zero:
    503   case NVPTXISD::Suld3DV4I16Zero:
    504   case NVPTXISD::Suld3DV4I32Zero:
    505     if (trySurfaceIntrinsic(N))
    506       return;
    507     break;
    508   case ISD::AND:
    509   case ISD::SRA:
    510   case ISD::SRL:
    511     // Try to select BFE
    512     if (tryBFE(N))
    513       return;
    514     break;
    515   case ISD::ADDRSPACECAST:
    516     SelectAddrSpaceCast(N);
    517     return;
    518   default:
    519     break;
    520   }
    521   SelectCode(N);
    522 }
    523 
    524 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
    525   unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
    526   switch (IID) {
    527   default:
    528     return false;
    529   case Intrinsic::nvvm_ldg_global_f:
    530   case Intrinsic::nvvm_ldg_global_i:
    531   case Intrinsic::nvvm_ldg_global_p:
    532   case Intrinsic::nvvm_ldu_global_f:
    533   case Intrinsic::nvvm_ldu_global_i:
    534   case Intrinsic::nvvm_ldu_global_p:
    535     return tryLDGLDU(N);
    536   }
    537 }
    538 
    539 static unsigned int getCodeAddrSpace(MemSDNode *N) {
    540   const Value *Src = N->getMemOperand()->getValue();
    541 
    542   if (!Src)
    543     return NVPTX::PTXLdStInstCode::GENERIC;
    544 
    545   if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
    546     switch (PT->getAddressSpace()) {
    547     case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
    548     case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
    549     case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
    550     case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
    551     case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
    552     case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
    553     default: break;
    554     }
    555   }
    556   return NVPTX::PTXLdStInstCode::GENERIC;
    557 }
    558 
    559 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
    560                           unsigned CodeAddrSpace, MachineFunction *F) {
    561   // To use non-coherent caching, the load has to be from global
    562   // memory and we have to prove that the memory area is not written
    563   // to anywhere for the duration of the kernel call, not even after
    564   // the load.
    565   //
    566   // To ensure that there are no writes to the memory, we require the
    567   // underlying pointer to be a noalias (__restrict) kernel parameter
    568   // that is never used for a write. We can only do this for kernel
    569   // functions since from within a device function, we cannot know if
    570   // there were or will be writes to the memory from the caller - or we
    571   // could, but then we would have to do inter-procedural analysis.
    572   if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
    573       !isKernelFunction(*F->getFunction())) {
    574     return false;
    575   }
    576 
    577   // We use GetUnderlyingObjects() here instead of
    578   // GetUnderlyingObject() mainly because the former looks through phi
    579   // nodes while the latter does not. We need to look through phi
    580   // nodes to handle pointer induction variables.
    581   SmallVector<Value *, 8> Objs;
    582   GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
    583                        Objs, F->getDataLayout());
    584   for (Value *Obj : Objs) {
    585     auto *A = dyn_cast<const Argument>(Obj);
    586     if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
    587   }
    588 
    589   return true;
    590 }
    591 
    592 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
    593   unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
    594   switch (IID) {
    595   default:
    596     return false;
    597   case Intrinsic::nvvm_texsurf_handle_internal:
    598     SelectTexSurfHandle(N);
    599     return true;
    600   }
    601 }
    602 
    603 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
    604   // Op 0 is the intrinsic ID
    605   SDValue Wrapper = N->getOperand(1);
    606   SDValue GlobalVal = Wrapper.getOperand(0);
    607   ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
    608                                         MVT::i64, GlobalVal));
    609 }
    610 
    611 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
    612   SDValue Src = N->getOperand(0);
    613   AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
    614   unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
    615   unsigned DstAddrSpace = CastN->getDestAddressSpace();
    616 
    617   assert(SrcAddrSpace != DstAddrSpace &&
    618          "addrspacecast must be between different address spaces");
    619 
    620   if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
    621     // Specific to generic
    622     unsigned Opc;
    623     switch (SrcAddrSpace) {
    624     default: report_fatal_error("Bad address space in addrspacecast");
    625     case ADDRESS_SPACE_GLOBAL:
    626       Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
    627       break;
    628     case ADDRESS_SPACE_SHARED:
    629       Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
    630       break;
    631     case ADDRESS_SPACE_CONST:
    632       Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
    633       break;
    634     case ADDRESS_SPACE_LOCAL:
    635       Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
    636       break;
    637     }
    638     ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
    639                                           Src));
    640     return;
    641   } else {
    642     // Generic to specific
    643     if (SrcAddrSpace != 0)
    644       report_fatal_error("Cannot cast between two non-generic address spaces");
    645     unsigned Opc;
    646     switch (DstAddrSpace) {
    647     default: report_fatal_error("Bad address space in addrspacecast");
    648     case ADDRESS_SPACE_GLOBAL:
    649       Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
    650                          : NVPTX::cvta_to_global_yes;
    651       break;
    652     case ADDRESS_SPACE_SHARED:
    653       Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
    654                          : NVPTX::cvta_to_shared_yes;
    655       break;
    656     case ADDRESS_SPACE_CONST:
    657       Opc =
    658           TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
    659       break;
    660     case ADDRESS_SPACE_LOCAL:
    661       Opc =
    662           TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
    663       break;
    664     case ADDRESS_SPACE_PARAM:
    665       Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
    666                          : NVPTX::nvvm_ptr_gen_to_param;
    667       break;
    668     }
    669     ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
    670                                           Src));
    671     return;
    672   }
    673 }
    674 
    675 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
    676   SDLoc dl(N);
    677   LoadSDNode *LD = cast<LoadSDNode>(N);
    678   EVT LoadedVT = LD->getMemoryVT();
    679   SDNode *NVPTXLD = nullptr;
    680 
    681   // do not support pre/post inc/dec
    682   if (LD->isIndexed())
    683     return false;
    684 
    685   if (!LoadedVT.isSimple())
    686     return false;
    687 
    688   // Address Space Setting
    689   unsigned int codeAddrSpace = getCodeAddrSpace(LD);
    690 
    691   if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
    692     return tryLDGLDU(N);
    693   }
    694 
    695   // Volatile Setting
    696   // - .volatile is only availalble for .global and .shared
    697   bool isVolatile = LD->isVolatile();
    698   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    699       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    700       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    701     isVolatile = false;
    702 
    703   // Vector Setting
    704   MVT SimpleVT = LoadedVT.getSimpleVT();
    705   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
    706   if (SimpleVT.isVector()) {
    707     unsigned num = SimpleVT.getVectorNumElements();
    708     if (num == 2)
    709       vecType = NVPTX::PTXLdStInstCode::V2;
    710     else if (num == 4)
    711       vecType = NVPTX::PTXLdStInstCode::V4;
    712     else
    713       return false;
    714   }
    715 
    716   // Type Setting: fromType + fromTypeWidth
    717   //
    718   // Sign   : ISD::SEXTLOAD
    719   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
    720   //          type is integer
    721   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
    722   MVT ScalarVT = SimpleVT.getScalarType();
    723   // Read at least 8 bits (predicates are stored as 8-bit values)
    724   unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
    725   unsigned int fromType;
    726   if ((LD->getExtensionType() == ISD::SEXTLOAD))
    727     fromType = NVPTX::PTXLdStInstCode::Signed;
    728   else if (ScalarVT.isFloatingPoint())
    729     fromType = NVPTX::PTXLdStInstCode::Float;
    730   else
    731     fromType = NVPTX::PTXLdStInstCode::Unsigned;
    732 
    733   // Create the machine instruction DAG
    734   SDValue Chain = N->getOperand(0);
    735   SDValue N1 = N->getOperand(1);
    736   SDValue Addr;
    737   SDValue Offset, Base;
    738   unsigned Opcode;
    739   MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
    740 
    741   if (SelectDirectAddr(N1, Addr)) {
    742     switch (TargetVT) {
    743     case MVT::i8:
    744       Opcode = NVPTX::LD_i8_avar;
    745       break;
    746     case MVT::i16:
    747       Opcode = NVPTX::LD_i16_avar;
    748       break;
    749     case MVT::i32:
    750       Opcode = NVPTX::LD_i32_avar;
    751       break;
    752     case MVT::i64:
    753       Opcode = NVPTX::LD_i64_avar;
    754       break;
    755     case MVT::f32:
    756       Opcode = NVPTX::LD_f32_avar;
    757       break;
    758     case MVT::f64:
    759       Opcode = NVPTX::LD_f64_avar;
    760       break;
    761     default:
    762       return false;
    763     }
    764     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
    765                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
    766                       getI32Imm(fromTypeWidth, dl), Addr, Chain };
    767     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    768   } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
    769                           : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
    770     switch (TargetVT) {
    771     case MVT::i8:
    772       Opcode = NVPTX::LD_i8_asi;
    773       break;
    774     case MVT::i16:
    775       Opcode = NVPTX::LD_i16_asi;
    776       break;
    777     case MVT::i32:
    778       Opcode = NVPTX::LD_i32_asi;
    779       break;
    780     case MVT::i64:
    781       Opcode = NVPTX::LD_i64_asi;
    782       break;
    783     case MVT::f32:
    784       Opcode = NVPTX::LD_f32_asi;
    785       break;
    786     case MVT::f64:
    787       Opcode = NVPTX::LD_f64_asi;
    788       break;
    789     default:
    790       return false;
    791     }
    792     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
    793                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
    794                       getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
    795     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    796   } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
    797                           : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
    798     if (TM.is64Bit()) {
    799       switch (TargetVT) {
    800       case MVT::i8:
    801         Opcode = NVPTX::LD_i8_ari_64;
    802         break;
    803       case MVT::i16:
    804         Opcode = NVPTX::LD_i16_ari_64;
    805         break;
    806       case MVT::i32:
    807         Opcode = NVPTX::LD_i32_ari_64;
    808         break;
    809       case MVT::i64:
    810         Opcode = NVPTX::LD_i64_ari_64;
    811         break;
    812       case MVT::f32:
    813         Opcode = NVPTX::LD_f32_ari_64;
    814         break;
    815       case MVT::f64:
    816         Opcode = NVPTX::LD_f64_ari_64;
    817         break;
    818       default:
    819         return false;
    820       }
    821     } else {
    822       switch (TargetVT) {
    823       case MVT::i8:
    824         Opcode = NVPTX::LD_i8_ari;
    825         break;
    826       case MVT::i16:
    827         Opcode = NVPTX::LD_i16_ari;
    828         break;
    829       case MVT::i32:
    830         Opcode = NVPTX::LD_i32_ari;
    831         break;
    832       case MVT::i64:
    833         Opcode = NVPTX::LD_i64_ari;
    834         break;
    835       case MVT::f32:
    836         Opcode = NVPTX::LD_f32_ari;
    837         break;
    838       case MVT::f64:
    839         Opcode = NVPTX::LD_f64_ari;
    840         break;
    841       default:
    842         return false;
    843       }
    844     }
    845     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
    846                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
    847                       getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
    848     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    849   } else {
    850     if (TM.is64Bit()) {
    851       switch (TargetVT) {
    852       case MVT::i8:
    853         Opcode = NVPTX::LD_i8_areg_64;
    854         break;
    855       case MVT::i16:
    856         Opcode = NVPTX::LD_i16_areg_64;
    857         break;
    858       case MVT::i32:
    859         Opcode = NVPTX::LD_i32_areg_64;
    860         break;
    861       case MVT::i64:
    862         Opcode = NVPTX::LD_i64_areg_64;
    863         break;
    864       case MVT::f32:
    865         Opcode = NVPTX::LD_f32_areg_64;
    866         break;
    867       case MVT::f64:
    868         Opcode = NVPTX::LD_f64_areg_64;
    869         break;
    870       default:
    871         return false;
    872       }
    873     } else {
    874       switch (TargetVT) {
    875       case MVT::i8:
    876         Opcode = NVPTX::LD_i8_areg;
    877         break;
    878       case MVT::i16:
    879         Opcode = NVPTX::LD_i16_areg;
    880         break;
    881       case MVT::i32:
    882         Opcode = NVPTX::LD_i32_areg;
    883         break;
    884       case MVT::i64:
    885         Opcode = NVPTX::LD_i64_areg;
    886         break;
    887       case MVT::f32:
    888         Opcode = NVPTX::LD_f32_areg;
    889         break;
    890       case MVT::f64:
    891         Opcode = NVPTX::LD_f64_areg;
    892         break;
    893       default:
    894         return false;
    895       }
    896     }
    897     SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
    898                       getI32Imm(vecType, dl), getI32Imm(fromType, dl),
    899                       getI32Imm(fromTypeWidth, dl), N1, Chain };
    900     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
    901   }
    902 
    903   if (!NVPTXLD)
    904     return false;
    905 
    906   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
    907   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
    908   cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
    909 
    910   ReplaceNode(N, NVPTXLD);
    911   return true;
    912 }
    913 
    914 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
    915 
    916   SDValue Chain = N->getOperand(0);
    917   SDValue Op1 = N->getOperand(1);
    918   SDValue Addr, Offset, Base;
    919   unsigned Opcode;
    920   SDLoc DL(N);
    921   SDNode *LD;
    922   MemSDNode *MemSD = cast<MemSDNode>(N);
    923   EVT LoadedVT = MemSD->getMemoryVT();
    924 
    925   if (!LoadedVT.isSimple())
    926     return false;
    927 
    928   // Address Space Setting
    929   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
    930 
    931   if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
    932     return tryLDGLDU(N);
    933   }
    934 
    935   // Volatile Setting
    936   // - .volatile is only availalble for .global and .shared
    937   bool IsVolatile = MemSD->isVolatile();
    938   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    939       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    940       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    941     IsVolatile = false;
    942 
    943   // Vector Setting
    944   MVT SimpleVT = LoadedVT.getSimpleVT();
    945 
    946   // Type Setting: fromType + fromTypeWidth
    947   //
    948   // Sign   : ISD::SEXTLOAD
    949   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
    950   //          type is integer
    951   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
    952   MVT ScalarVT = SimpleVT.getScalarType();
    953   // Read at least 8 bits (predicates are stored as 8-bit values)
    954   unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
    955   unsigned int FromType;
    956   // The last operand holds the original LoadSDNode::getExtensionType() value
    957   unsigned ExtensionType = cast<ConstantSDNode>(
    958       N->getOperand(N->getNumOperands() - 1))->getZExtValue();
    959   if (ExtensionType == ISD::SEXTLOAD)
    960     FromType = NVPTX::PTXLdStInstCode::Signed;
    961   else if (ScalarVT.isFloatingPoint())
    962     FromType = NVPTX::PTXLdStInstCode::Float;
    963   else
    964     FromType = NVPTX::PTXLdStInstCode::Unsigned;
    965 
    966   unsigned VecType;
    967 
    968   switch (N->getOpcode()) {
    969   case NVPTXISD::LoadV2:
    970     VecType = NVPTX::PTXLdStInstCode::V2;
    971     break;
    972   case NVPTXISD::LoadV4:
    973     VecType = NVPTX::PTXLdStInstCode::V4;
    974     break;
    975   default:
    976     return false;
    977   }
    978 
    979   EVT EltVT = N->getValueType(0);
    980 
    981   if (SelectDirectAddr(Op1, Addr)) {
    982     switch (N->getOpcode()) {
    983     default:
    984       return false;
    985     case NVPTXISD::LoadV2:
    986       switch (EltVT.getSimpleVT().SimpleTy) {
    987       default:
    988         return false;
    989       case MVT::i8:
    990         Opcode = NVPTX::LDV_i8_v2_avar;
    991         break;
    992       case MVT::i16:
    993         Opcode = NVPTX::LDV_i16_v2_avar;
    994         break;
    995       case MVT::i32:
    996         Opcode = NVPTX::LDV_i32_v2_avar;
    997         break;
    998       case MVT::i64:
    999         Opcode = NVPTX::LDV_i64_v2_avar;
   1000         break;
   1001       case MVT::f32:
   1002         Opcode = NVPTX::LDV_f32_v2_avar;
   1003         break;
   1004       case MVT::f64:
   1005         Opcode = NVPTX::LDV_f64_v2_avar;
   1006         break;
   1007       }
   1008       break;
   1009     case NVPTXISD::LoadV4:
   1010       switch (EltVT.getSimpleVT().SimpleTy) {
   1011       default:
   1012         return false;
   1013       case MVT::i8:
   1014         Opcode = NVPTX::LDV_i8_v4_avar;
   1015         break;
   1016       case MVT::i16:
   1017         Opcode = NVPTX::LDV_i16_v4_avar;
   1018         break;
   1019       case MVT::i32:
   1020         Opcode = NVPTX::LDV_i32_v4_avar;
   1021         break;
   1022       case MVT::f32:
   1023         Opcode = NVPTX::LDV_f32_v4_avar;
   1024         break;
   1025       }
   1026       break;
   1027     }
   1028 
   1029     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
   1030                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
   1031                       getI32Imm(FromTypeWidth, DL), Addr, Chain };
   1032     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1033   } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
   1034                           : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
   1035     switch (N->getOpcode()) {
   1036     default:
   1037       return false;
   1038     case NVPTXISD::LoadV2:
   1039       switch (EltVT.getSimpleVT().SimpleTy) {
   1040       default:
   1041         return false;
   1042       case MVT::i8:
   1043         Opcode = NVPTX::LDV_i8_v2_asi;
   1044         break;
   1045       case MVT::i16:
   1046         Opcode = NVPTX::LDV_i16_v2_asi;
   1047         break;
   1048       case MVT::i32:
   1049         Opcode = NVPTX::LDV_i32_v2_asi;
   1050         break;
   1051       case MVT::i64:
   1052         Opcode = NVPTX::LDV_i64_v2_asi;
   1053         break;
   1054       case MVT::f32:
   1055         Opcode = NVPTX::LDV_f32_v2_asi;
   1056         break;
   1057       case MVT::f64:
   1058         Opcode = NVPTX::LDV_f64_v2_asi;
   1059         break;
   1060       }
   1061       break;
   1062     case NVPTXISD::LoadV4:
   1063       switch (EltVT.getSimpleVT().SimpleTy) {
   1064       default:
   1065         return false;
   1066       case MVT::i8:
   1067         Opcode = NVPTX::LDV_i8_v4_asi;
   1068         break;
   1069       case MVT::i16:
   1070         Opcode = NVPTX::LDV_i16_v4_asi;
   1071         break;
   1072       case MVT::i32:
   1073         Opcode = NVPTX::LDV_i32_v4_asi;
   1074         break;
   1075       case MVT::f32:
   1076         Opcode = NVPTX::LDV_f32_v4_asi;
   1077         break;
   1078       }
   1079       break;
   1080     }
   1081 
   1082     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
   1083                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
   1084                       getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
   1085     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1086   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
   1087                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
   1088     if (TM.is64Bit()) {
   1089       switch (N->getOpcode()) {
   1090       default:
   1091         return false;
   1092       case NVPTXISD::LoadV2:
   1093         switch (EltVT.getSimpleVT().SimpleTy) {
   1094         default:
   1095           return false;
   1096         case MVT::i8:
   1097           Opcode = NVPTX::LDV_i8_v2_ari_64;
   1098           break;
   1099         case MVT::i16:
   1100           Opcode = NVPTX::LDV_i16_v2_ari_64;
   1101           break;
   1102         case MVT::i32:
   1103           Opcode = NVPTX::LDV_i32_v2_ari_64;
   1104           break;
   1105         case MVT::i64:
   1106           Opcode = NVPTX::LDV_i64_v2_ari_64;
   1107           break;
   1108         case MVT::f32:
   1109           Opcode = NVPTX::LDV_f32_v2_ari_64;
   1110           break;
   1111         case MVT::f64:
   1112           Opcode = NVPTX::LDV_f64_v2_ari_64;
   1113           break;
   1114         }
   1115         break;
   1116       case NVPTXISD::LoadV4:
   1117         switch (EltVT.getSimpleVT().SimpleTy) {
   1118         default:
   1119           return false;
   1120         case MVT::i8:
   1121           Opcode = NVPTX::LDV_i8_v4_ari_64;
   1122           break;
   1123         case MVT::i16:
   1124           Opcode = NVPTX::LDV_i16_v4_ari_64;
   1125           break;
   1126         case MVT::i32:
   1127           Opcode = NVPTX::LDV_i32_v4_ari_64;
   1128           break;
   1129         case MVT::f32:
   1130           Opcode = NVPTX::LDV_f32_v4_ari_64;
   1131           break;
   1132         }
   1133         break;
   1134       }
   1135     } else {
   1136       switch (N->getOpcode()) {
   1137       default:
   1138         return false;
   1139       case NVPTXISD::LoadV2:
   1140         switch (EltVT.getSimpleVT().SimpleTy) {
   1141         default:
   1142           return false;
   1143         case MVT::i8:
   1144           Opcode = NVPTX::LDV_i8_v2_ari;
   1145           break;
   1146         case MVT::i16:
   1147           Opcode = NVPTX::LDV_i16_v2_ari;
   1148           break;
   1149         case MVT::i32:
   1150           Opcode = NVPTX::LDV_i32_v2_ari;
   1151           break;
   1152         case MVT::i64:
   1153           Opcode = NVPTX::LDV_i64_v2_ari;
   1154           break;
   1155         case MVT::f32:
   1156           Opcode = NVPTX::LDV_f32_v2_ari;
   1157           break;
   1158         case MVT::f64:
   1159           Opcode = NVPTX::LDV_f64_v2_ari;
   1160           break;
   1161         }
   1162         break;
   1163       case NVPTXISD::LoadV4:
   1164         switch (EltVT.getSimpleVT().SimpleTy) {
   1165         default:
   1166           return false;
   1167         case MVT::i8:
   1168           Opcode = NVPTX::LDV_i8_v4_ari;
   1169           break;
   1170         case MVT::i16:
   1171           Opcode = NVPTX::LDV_i16_v4_ari;
   1172           break;
   1173         case MVT::i32:
   1174           Opcode = NVPTX::LDV_i32_v4_ari;
   1175           break;
   1176         case MVT::f32:
   1177           Opcode = NVPTX::LDV_f32_v4_ari;
   1178           break;
   1179         }
   1180         break;
   1181       }
   1182     }
   1183 
   1184     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
   1185                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
   1186                       getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
   1187 
   1188     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1189   } else {
   1190     if (TM.is64Bit()) {
   1191       switch (N->getOpcode()) {
   1192       default:
   1193         return false;
   1194       case NVPTXISD::LoadV2:
   1195         switch (EltVT.getSimpleVT().SimpleTy) {
   1196         default:
   1197           return false;
   1198         case MVT::i8:
   1199           Opcode = NVPTX::LDV_i8_v2_areg_64;
   1200           break;
   1201         case MVT::i16:
   1202           Opcode = NVPTX::LDV_i16_v2_areg_64;
   1203           break;
   1204         case MVT::i32:
   1205           Opcode = NVPTX::LDV_i32_v2_areg_64;
   1206           break;
   1207         case MVT::i64:
   1208           Opcode = NVPTX::LDV_i64_v2_areg_64;
   1209           break;
   1210         case MVT::f32:
   1211           Opcode = NVPTX::LDV_f32_v2_areg_64;
   1212           break;
   1213         case MVT::f64:
   1214           Opcode = NVPTX::LDV_f64_v2_areg_64;
   1215           break;
   1216         }
   1217         break;
   1218       case NVPTXISD::LoadV4:
   1219         switch (EltVT.getSimpleVT().SimpleTy) {
   1220         default:
   1221           return false;
   1222         case MVT::i8:
   1223           Opcode = NVPTX::LDV_i8_v4_areg_64;
   1224           break;
   1225         case MVT::i16:
   1226           Opcode = NVPTX::LDV_i16_v4_areg_64;
   1227           break;
   1228         case MVT::i32:
   1229           Opcode = NVPTX::LDV_i32_v4_areg_64;
   1230           break;
   1231         case MVT::f32:
   1232           Opcode = NVPTX::LDV_f32_v4_areg_64;
   1233           break;
   1234         }
   1235         break;
   1236       }
   1237     } else {
   1238       switch (N->getOpcode()) {
   1239       default:
   1240         return false;
   1241       case NVPTXISD::LoadV2:
   1242         switch (EltVT.getSimpleVT().SimpleTy) {
   1243         default:
   1244           return false;
   1245         case MVT::i8:
   1246           Opcode = NVPTX::LDV_i8_v2_areg;
   1247           break;
   1248         case MVT::i16:
   1249           Opcode = NVPTX::LDV_i16_v2_areg;
   1250           break;
   1251         case MVT::i32:
   1252           Opcode = NVPTX::LDV_i32_v2_areg;
   1253           break;
   1254         case MVT::i64:
   1255           Opcode = NVPTX::LDV_i64_v2_areg;
   1256           break;
   1257         case MVT::f32:
   1258           Opcode = NVPTX::LDV_f32_v2_areg;
   1259           break;
   1260         case MVT::f64:
   1261           Opcode = NVPTX::LDV_f64_v2_areg;
   1262           break;
   1263         }
   1264         break;
   1265       case NVPTXISD::LoadV4:
   1266         switch (EltVT.getSimpleVT().SimpleTy) {
   1267         default:
   1268           return false;
   1269         case MVT::i8:
   1270           Opcode = NVPTX::LDV_i8_v4_areg;
   1271           break;
   1272         case MVT::i16:
   1273           Opcode = NVPTX::LDV_i16_v4_areg;
   1274           break;
   1275         case MVT::i32:
   1276           Opcode = NVPTX::LDV_i32_v4_areg;
   1277           break;
   1278         case MVT::f32:
   1279           Opcode = NVPTX::LDV_f32_v4_areg;
   1280           break;
   1281         }
   1282         break;
   1283       }
   1284     }
   1285 
   1286     SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
   1287                       getI32Imm(VecType, DL), getI32Imm(FromType, DL),
   1288                       getI32Imm(FromTypeWidth, DL), Op1, Chain };
   1289     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   1290   }
   1291 
   1292   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   1293   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   1294   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
   1295 
   1296   ReplaceNode(N, LD);
   1297   return true;
   1298 }
   1299 
   1300 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
   1301 
   1302   SDValue Chain = N->getOperand(0);
   1303   SDValue Op1;
   1304   MemSDNode *Mem;
   1305   bool IsLDG = true;
   1306 
   1307   // If this is an LDG intrinsic, the address is the third operand. If its an
   1308   // LDG/LDU SD node (from custom vector handling), then its the second operand
   1309   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
   1310     Op1 = N->getOperand(2);
   1311     Mem = cast<MemIntrinsicSDNode>(N);
   1312     unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
   1313     switch (IID) {
   1314     default:
   1315       return false;
   1316     case Intrinsic::nvvm_ldg_global_f:
   1317     case Intrinsic::nvvm_ldg_global_i:
   1318     case Intrinsic::nvvm_ldg_global_p:
   1319       IsLDG = true;
   1320       break;
   1321     case Intrinsic::nvvm_ldu_global_f:
   1322     case Intrinsic::nvvm_ldu_global_i:
   1323     case Intrinsic::nvvm_ldu_global_p:
   1324       IsLDG = false;
   1325       break;
   1326     }
   1327   } else {
   1328     Op1 = N->getOperand(1);
   1329     Mem = cast<MemSDNode>(N);
   1330   }
   1331 
   1332   unsigned Opcode;
   1333   SDLoc DL(N);
   1334   SDNode *LD;
   1335   SDValue Base, Offset, Addr;
   1336 
   1337   EVT EltVT = Mem->getMemoryVT();
   1338   unsigned NumElts = 1;
   1339   if (EltVT.isVector()) {
   1340     NumElts = EltVT.getVectorNumElements();
   1341     EltVT = EltVT.getVectorElementType();
   1342   }
   1343 
   1344   // Build the "promoted" result VTList for the load. If we are really loading
   1345   // i8s, then the return type will be promoted to i16 since we do not expose
   1346   // 8-bit registers in NVPTX.
   1347   EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
   1348   SmallVector<EVT, 5> InstVTs;
   1349   for (unsigned i = 0; i != NumElts; ++i) {
   1350     InstVTs.push_back(NodeVT);
   1351   }
   1352   InstVTs.push_back(MVT::Other);
   1353   SDVTList InstVTList = CurDAG->getVTList(InstVTs);
   1354 
   1355   if (SelectDirectAddr(Op1, Addr)) {
   1356     switch (N->getOpcode()) {
   1357     default:
   1358       return false;
   1359     case ISD::INTRINSIC_W_CHAIN:
   1360       if (IsLDG) {
   1361         switch (EltVT.getSimpleVT().SimpleTy) {
   1362         default:
   1363           return false;
   1364         case MVT::i8:
   1365           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
   1366           break;
   1367         case MVT::i16:
   1368           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
   1369           break;
   1370         case MVT::i32:
   1371           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
   1372           break;
   1373         case MVT::i64:
   1374           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
   1375           break;
   1376         case MVT::f32:
   1377           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
   1378           break;
   1379         case MVT::f64:
   1380           Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
   1381           break;
   1382         }
   1383       } else {
   1384         switch (EltVT.getSimpleVT().SimpleTy) {
   1385         default:
   1386           return false;
   1387         case MVT::i8:
   1388           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
   1389           break;
   1390         case MVT::i16:
   1391           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
   1392           break;
   1393         case MVT::i32:
   1394           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
   1395           break;
   1396         case MVT::i64:
   1397           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
   1398           break;
   1399         case MVT::f32:
   1400           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
   1401           break;
   1402         case MVT::f64:
   1403           Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
   1404           break;
   1405         }
   1406       }
   1407       break;
   1408     case NVPTXISD::LDGV2:
   1409       switch (EltVT.getSimpleVT().SimpleTy) {
   1410       default:
   1411         return false;
   1412       case MVT::i8:
   1413         Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
   1414         break;
   1415       case MVT::i16:
   1416         Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
   1417         break;
   1418       case MVT::i32:
   1419         Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
   1420         break;
   1421       case MVT::i64:
   1422         Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
   1423         break;
   1424       case MVT::f32:
   1425         Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
   1426         break;
   1427       case MVT::f64:
   1428         Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
   1429         break;
   1430       }
   1431       break;
   1432     case NVPTXISD::LDUV2:
   1433       switch (EltVT.getSimpleVT().SimpleTy) {
   1434       default:
   1435         return false;
   1436       case MVT::i8:
   1437         Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
   1438         break;
   1439       case MVT::i16:
   1440         Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
   1441         break;
   1442       case MVT::i32:
   1443         Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
   1444         break;
   1445       case MVT::i64:
   1446         Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
   1447         break;
   1448       case MVT::f32:
   1449         Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
   1450         break;
   1451       case MVT::f64:
   1452         Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
   1453         break;
   1454       }
   1455       break;
   1456     case NVPTXISD::LDGV4:
   1457       switch (EltVT.getSimpleVT().SimpleTy) {
   1458       default:
   1459         return false;
   1460       case MVT::i8:
   1461         Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
   1462         break;
   1463       case MVT::i16:
   1464         Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
   1465         break;
   1466       case MVT::i32:
   1467         Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
   1468         break;
   1469       case MVT::f32:
   1470         Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
   1471         break;
   1472       }
   1473       break;
   1474     case NVPTXISD::LDUV4:
   1475       switch (EltVT.getSimpleVT().SimpleTy) {
   1476       default:
   1477         return false;
   1478       case MVT::i8:
   1479         Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
   1480         break;
   1481       case MVT::i16:
   1482         Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
   1483         break;
   1484       case MVT::i32:
   1485         Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
   1486         break;
   1487       case MVT::f32:
   1488         Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
   1489         break;
   1490       }
   1491       break;
   1492     }
   1493 
   1494     SDValue Ops[] = { Addr, Chain };
   1495     LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
   1496   } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
   1497                           : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
   1498     if (TM.is64Bit()) {
   1499       switch (N->getOpcode()) {
   1500       default:
   1501         return false;
   1502       case ISD::LOAD:
   1503       case ISD::INTRINSIC_W_CHAIN:
   1504         if (IsLDG) {
   1505           switch (EltVT.getSimpleVT().SimpleTy) {
   1506           default:
   1507             return false;
   1508           case MVT::i8:
   1509             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
   1510             break;
   1511           case MVT::i16:
   1512             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
   1513             break;
   1514           case MVT::i32:
   1515             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
   1516             break;
   1517           case MVT::i64:
   1518             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
   1519             break;
   1520           case MVT::f32:
   1521             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
   1522             break;
   1523           case MVT::f64:
   1524             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
   1525             break;
   1526           }
   1527         } else {
   1528           switch (EltVT.getSimpleVT().SimpleTy) {
   1529           default:
   1530             return false;
   1531           case MVT::i8:
   1532             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
   1533             break;
   1534           case MVT::i16:
   1535             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
   1536             break;
   1537           case MVT::i32:
   1538             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
   1539             break;
   1540           case MVT::i64:
   1541             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
   1542             break;
   1543           case MVT::f32:
   1544             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
   1545             break;
   1546           case MVT::f64:
   1547             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
   1548             break;
   1549           }
   1550         }
   1551         break;
   1552       case NVPTXISD::LoadV2:
   1553       case NVPTXISD::LDGV2:
   1554         switch (EltVT.getSimpleVT().SimpleTy) {
   1555         default:
   1556           return false;
   1557         case MVT::i8:
   1558           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
   1559           break;
   1560         case MVT::i16:
   1561           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
   1562           break;
   1563         case MVT::i32:
   1564           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
   1565           break;
   1566         case MVT::i64:
   1567           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
   1568           break;
   1569         case MVT::f32:
   1570           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
   1571           break;
   1572         case MVT::f64:
   1573           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
   1574           break;
   1575         }
   1576         break;
   1577       case NVPTXISD::LDUV2:
   1578         switch (EltVT.getSimpleVT().SimpleTy) {
   1579         default:
   1580           return false;
   1581         case MVT::i8:
   1582           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
   1583           break;
   1584         case MVT::i16:
   1585           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
   1586           break;
   1587         case MVT::i32:
   1588           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
   1589           break;
   1590         case MVT::i64:
   1591           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
   1592           break;
   1593         case MVT::f32:
   1594           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
   1595           break;
   1596         case MVT::f64:
   1597           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
   1598           break;
   1599         }
   1600         break;
   1601       case NVPTXISD::LoadV4:
   1602       case NVPTXISD::LDGV4:
   1603         switch (EltVT.getSimpleVT().SimpleTy) {
   1604         default:
   1605           return false;
   1606         case MVT::i8:
   1607           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
   1608           break;
   1609         case MVT::i16:
   1610           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
   1611           break;
   1612         case MVT::i32:
   1613           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
   1614           break;
   1615         case MVT::f32:
   1616           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
   1617           break;
   1618         }
   1619         break;
   1620       case NVPTXISD::LDUV4:
   1621         switch (EltVT.getSimpleVT().SimpleTy) {
   1622         default:
   1623           return false;
   1624         case MVT::i8:
   1625           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
   1626           break;
   1627         case MVT::i16:
   1628           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
   1629           break;
   1630         case MVT::i32:
   1631           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
   1632           break;
   1633         case MVT::f32:
   1634           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
   1635           break;
   1636         }
   1637         break;
   1638       }
   1639     } else {
   1640       switch (N->getOpcode()) {
   1641       default:
   1642         return false;
   1643       case ISD::LOAD:
   1644       case ISD::INTRINSIC_W_CHAIN:
   1645         if (IsLDG) {
   1646           switch (EltVT.getSimpleVT().SimpleTy) {
   1647           default:
   1648             return false;
   1649           case MVT::i8:
   1650             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
   1651             break;
   1652           case MVT::i16:
   1653             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
   1654             break;
   1655           case MVT::i32:
   1656             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
   1657             break;
   1658           case MVT::i64:
   1659             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
   1660             break;
   1661           case MVT::f32:
   1662             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
   1663             break;
   1664           case MVT::f64:
   1665             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
   1666             break;
   1667           }
   1668         } else {
   1669           switch (EltVT.getSimpleVT().SimpleTy) {
   1670           default:
   1671             return false;
   1672           case MVT::i8:
   1673             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
   1674             break;
   1675           case MVT::i16:
   1676             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
   1677             break;
   1678           case MVT::i32:
   1679             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
   1680             break;
   1681           case MVT::i64:
   1682             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
   1683             break;
   1684           case MVT::f32:
   1685             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
   1686             break;
   1687           case MVT::f64:
   1688             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
   1689             break;
   1690           }
   1691         }
   1692         break;
   1693       case NVPTXISD::LoadV2:
   1694       case NVPTXISD::LDGV2:
   1695         switch (EltVT.getSimpleVT().SimpleTy) {
   1696         default:
   1697           return false;
   1698         case MVT::i8:
   1699           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
   1700           break;
   1701         case MVT::i16:
   1702           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
   1703           break;
   1704         case MVT::i32:
   1705           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
   1706           break;
   1707         case MVT::i64:
   1708           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
   1709           break;
   1710         case MVT::f32:
   1711           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
   1712           break;
   1713         case MVT::f64:
   1714           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
   1715           break;
   1716         }
   1717         break;
   1718       case NVPTXISD::LDUV2:
   1719         switch (EltVT.getSimpleVT().SimpleTy) {
   1720         default:
   1721           return false;
   1722         case MVT::i8:
   1723           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
   1724           break;
   1725         case MVT::i16:
   1726           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
   1727           break;
   1728         case MVT::i32:
   1729           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
   1730           break;
   1731         case MVT::i64:
   1732           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
   1733           break;
   1734         case MVT::f32:
   1735           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
   1736           break;
   1737         case MVT::f64:
   1738           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
   1739           break;
   1740         }
   1741         break;
   1742       case NVPTXISD::LoadV4:
   1743       case NVPTXISD::LDGV4:
   1744         switch (EltVT.getSimpleVT().SimpleTy) {
   1745         default:
   1746           return false;
   1747         case MVT::i8:
   1748           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
   1749           break;
   1750         case MVT::i16:
   1751           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
   1752           break;
   1753         case MVT::i32:
   1754           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
   1755           break;
   1756         case MVT::f32:
   1757           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
   1758           break;
   1759         }
   1760         break;
   1761       case NVPTXISD::LDUV4:
   1762         switch (EltVT.getSimpleVT().SimpleTy) {
   1763         default:
   1764           return false;
   1765         case MVT::i8:
   1766           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
   1767           break;
   1768         case MVT::i16:
   1769           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
   1770           break;
   1771         case MVT::i32:
   1772           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
   1773           break;
   1774         case MVT::f32:
   1775           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
   1776           break;
   1777         }
   1778         break;
   1779       }
   1780     }
   1781 
   1782     SDValue Ops[] = { Base, Offset, Chain };
   1783 
   1784     LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
   1785   } else {
   1786     if (TM.is64Bit()) {
   1787       switch (N->getOpcode()) {
   1788       default:
   1789         return false;
   1790       case ISD::LOAD:
   1791       case ISD::INTRINSIC_W_CHAIN:
   1792         if (IsLDG) {
   1793           switch (EltVT.getSimpleVT().SimpleTy) {
   1794           default:
   1795             return false;
   1796           case MVT::i8:
   1797             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
   1798             break;
   1799           case MVT::i16:
   1800             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
   1801             break;
   1802           case MVT::i32:
   1803             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
   1804             break;
   1805           case MVT::i64:
   1806             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
   1807             break;
   1808           case MVT::f32:
   1809             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
   1810             break;
   1811           case MVT::f64:
   1812             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
   1813             break;
   1814           }
   1815         } else {
   1816           switch (EltVT.getSimpleVT().SimpleTy) {
   1817           default:
   1818             return false;
   1819           case MVT::i8:
   1820             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
   1821             break;
   1822           case MVT::i16:
   1823             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
   1824             break;
   1825           case MVT::i32:
   1826             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
   1827             break;
   1828           case MVT::i64:
   1829             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
   1830             break;
   1831           case MVT::f32:
   1832             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
   1833             break;
   1834           case MVT::f64:
   1835             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
   1836             break;
   1837           }
   1838         }
   1839         break;
   1840       case NVPTXISD::LoadV2:
   1841       case NVPTXISD::LDGV2:
   1842         switch (EltVT.getSimpleVT().SimpleTy) {
   1843         default:
   1844           return false;
   1845         case MVT::i8:
   1846           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
   1847           break;
   1848         case MVT::i16:
   1849           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
   1850           break;
   1851         case MVT::i32:
   1852           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
   1853           break;
   1854         case MVT::i64:
   1855           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
   1856           break;
   1857         case MVT::f32:
   1858           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
   1859           break;
   1860         case MVT::f64:
   1861           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
   1862           break;
   1863         }
   1864         break;
   1865       case NVPTXISD::LDUV2:
   1866         switch (EltVT.getSimpleVT().SimpleTy) {
   1867         default:
   1868           return false;
   1869         case MVT::i8:
   1870           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
   1871           break;
   1872         case MVT::i16:
   1873           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
   1874           break;
   1875         case MVT::i32:
   1876           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
   1877           break;
   1878         case MVT::i64:
   1879           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
   1880           break;
   1881         case MVT::f32:
   1882           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
   1883           break;
   1884         case MVT::f64:
   1885           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
   1886           break;
   1887         }
   1888         break;
   1889       case NVPTXISD::LoadV4:
   1890       case NVPTXISD::LDGV4:
   1891         switch (EltVT.getSimpleVT().SimpleTy) {
   1892         default:
   1893           return false;
   1894         case MVT::i8:
   1895           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
   1896           break;
   1897         case MVT::i16:
   1898           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
   1899           break;
   1900         case MVT::i32:
   1901           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
   1902           break;
   1903         case MVT::f32:
   1904           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
   1905           break;
   1906         }
   1907         break;
   1908       case NVPTXISD::LDUV4:
   1909         switch (EltVT.getSimpleVT().SimpleTy) {
   1910         default:
   1911           return false;
   1912         case MVT::i8:
   1913           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
   1914           break;
   1915         case MVT::i16:
   1916           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
   1917           break;
   1918         case MVT::i32:
   1919           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
   1920           break;
   1921         case MVT::f32:
   1922           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
   1923           break;
   1924         }
   1925         break;
   1926       }
   1927     } else {
   1928       switch (N->getOpcode()) {
   1929       default:
   1930         return false;
   1931       case ISD::LOAD:
   1932       case ISD::INTRINSIC_W_CHAIN:
   1933         if (IsLDG) {
   1934           switch (EltVT.getSimpleVT().SimpleTy) {
   1935           default:
   1936             return false;
   1937           case MVT::i8:
   1938             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
   1939             break;
   1940           case MVT::i16:
   1941             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
   1942             break;
   1943           case MVT::i32:
   1944             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
   1945             break;
   1946           case MVT::i64:
   1947             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
   1948             break;
   1949           case MVT::f32:
   1950             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
   1951             break;
   1952           case MVT::f64:
   1953             Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
   1954             break;
   1955           }
   1956         } else {
   1957           switch (EltVT.getSimpleVT().SimpleTy) {
   1958           default:
   1959             return false;
   1960           case MVT::i8:
   1961             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
   1962             break;
   1963           case MVT::i16:
   1964             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
   1965             break;
   1966           case MVT::i32:
   1967             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
   1968             break;
   1969           case MVT::i64:
   1970             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
   1971             break;
   1972           case MVT::f32:
   1973             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
   1974             break;
   1975           case MVT::f64:
   1976             Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
   1977             break;
   1978           }
   1979         }
   1980         break;
   1981       case NVPTXISD::LoadV2:
   1982       case NVPTXISD::LDGV2:
   1983         switch (EltVT.getSimpleVT().SimpleTy) {
   1984         default:
   1985           return false;
   1986         case MVT::i8:
   1987           Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
   1988           break;
   1989         case MVT::i16:
   1990           Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
   1991           break;
   1992         case MVT::i32:
   1993           Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
   1994           break;
   1995         case MVT::i64:
   1996           Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
   1997           break;
   1998         case MVT::f32:
   1999           Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
   2000           break;
   2001         case MVT::f64:
   2002           Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
   2003           break;
   2004         }
   2005         break;
   2006       case NVPTXISD::LDUV2:
   2007         switch (EltVT.getSimpleVT().SimpleTy) {
   2008         default:
   2009           return false;
   2010         case MVT::i8:
   2011           Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
   2012           break;
   2013         case MVT::i16:
   2014           Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
   2015           break;
   2016         case MVT::i32:
   2017           Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
   2018           break;
   2019         case MVT::i64:
   2020           Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
   2021           break;
   2022         case MVT::f32:
   2023           Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
   2024           break;
   2025         case MVT::f64:
   2026           Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
   2027           break;
   2028         }
   2029         break;
   2030       case NVPTXISD::LoadV4:
   2031       case NVPTXISD::LDGV4:
   2032         switch (EltVT.getSimpleVT().SimpleTy) {
   2033         default:
   2034           return false;
   2035         case MVT::i8:
   2036           Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
   2037           break;
   2038         case MVT::i16:
   2039           Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
   2040           break;
   2041         case MVT::i32:
   2042           Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
   2043           break;
   2044         case MVT::f32:
   2045           Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
   2046           break;
   2047         }
   2048         break;
   2049       case NVPTXISD::LDUV4:
   2050         switch (EltVT.getSimpleVT().SimpleTy) {
   2051         default:
   2052           return false;
   2053         case MVT::i8:
   2054           Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
   2055           break;
   2056         case MVT::i16:
   2057           Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
   2058           break;
   2059         case MVT::i32:
   2060           Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
   2061           break;
   2062         case MVT::f32:
   2063           Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
   2064           break;
   2065         }
   2066         break;
   2067       }
   2068     }
   2069 
   2070     SDValue Ops[] = { Op1, Chain };
   2071     LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
   2072   }
   2073 
   2074   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2075   MemRefs0[0] = Mem->getMemOperand();
   2076   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2077 
   2078   // For automatic generation of LDG (through SelectLoad[Vector], not the
   2079   // intrinsics), we may have an extending load like:
   2080   //
   2081   //   i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
   2082   //
   2083   // In this case, the matching logic above will select a load for the original
   2084   // memory type (in this case, i8) and our types will not match (the node needs
   2085   // to return an i32 in this case). Our LDG/LDU nodes do not support the
   2086   // concept of sign-/zero-extension, so emulate it here by adding an explicit
   2087   // CVT instruction. Ptxas should clean up any redundancies here.
   2088 
   2089   EVT OrigType = N->getValueType(0);
   2090   LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
   2091 
   2092   if (OrigType != EltVT && LdNode) {
   2093     // We have an extending-load. The instruction we selected operates on the
   2094     // smaller type, but the SDNode we are replacing has the larger type. We
   2095     // need to emit a CVT to make the types match.
   2096     bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
   2097     unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
   2098                                        EltVT.getSimpleVT(), IsSigned);
   2099 
   2100     // For each output value, apply the manual sign/zero-extension and make sure
   2101     // all users of the load go through that CVT.
   2102     for (unsigned i = 0; i != NumElts; ++i) {
   2103       SDValue Res(LD, i);
   2104       SDValue OrigVal(N, i);
   2105 
   2106       SDNode *CvtNode =
   2107         CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
   2108                                CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
   2109                                                          DL, MVT::i32));
   2110       ReplaceUses(OrigVal, SDValue(CvtNode, 0));
   2111     }
   2112   }
   2113 
   2114   ReplaceNode(N, LD);
   2115   return true;
   2116 }
   2117 
   2118 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
   2119   SDLoc dl(N);
   2120   StoreSDNode *ST = cast<StoreSDNode>(N);
   2121   EVT StoreVT = ST->getMemoryVT();
   2122   SDNode *NVPTXST = nullptr;
   2123 
   2124   // do not support pre/post inc/dec
   2125   if (ST->isIndexed())
   2126     return false;
   2127 
   2128   if (!StoreVT.isSimple())
   2129     return false;
   2130 
   2131   // Address Space Setting
   2132   unsigned int codeAddrSpace = getCodeAddrSpace(ST);
   2133 
   2134   // Volatile Setting
   2135   // - .volatile is only availalble for .global and .shared
   2136   bool isVolatile = ST->isVolatile();
   2137   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
   2138       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
   2139       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
   2140     isVolatile = false;
   2141 
   2142   // Vector Setting
   2143   MVT SimpleVT = StoreVT.getSimpleVT();
   2144   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
   2145   if (SimpleVT.isVector()) {
   2146     unsigned num = SimpleVT.getVectorNumElements();
   2147     if (num == 2)
   2148       vecType = NVPTX::PTXLdStInstCode::V2;
   2149     else if (num == 4)
   2150       vecType = NVPTX::PTXLdStInstCode::V4;
   2151     else
   2152       return false;
   2153   }
   2154 
   2155   // Type Setting: toType + toTypeWidth
   2156   // - for integer type, always use 'u'
   2157   //
   2158   MVT ScalarVT = SimpleVT.getScalarType();
   2159   unsigned toTypeWidth = ScalarVT.getSizeInBits();
   2160   unsigned int toType;
   2161   if (ScalarVT.isFloatingPoint())
   2162     toType = NVPTX::PTXLdStInstCode::Float;
   2163   else
   2164     toType = NVPTX::PTXLdStInstCode::Unsigned;
   2165 
   2166   // Create the machine instruction DAG
   2167   SDValue Chain = N->getOperand(0);
   2168   SDValue N1 = N->getOperand(1);
   2169   SDValue N2 = N->getOperand(2);
   2170   SDValue Addr;
   2171   SDValue Offset, Base;
   2172   unsigned Opcode;
   2173   MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
   2174 
   2175   if (SelectDirectAddr(N2, Addr)) {
   2176     switch (SourceVT) {
   2177     case MVT::i8:
   2178       Opcode = NVPTX::ST_i8_avar;
   2179       break;
   2180     case MVT::i16:
   2181       Opcode = NVPTX::ST_i16_avar;
   2182       break;
   2183     case MVT::i32:
   2184       Opcode = NVPTX::ST_i32_avar;
   2185       break;
   2186     case MVT::i64:
   2187       Opcode = NVPTX::ST_i64_avar;
   2188       break;
   2189     case MVT::f32:
   2190       Opcode = NVPTX::ST_f32_avar;
   2191       break;
   2192     case MVT::f64:
   2193       Opcode = NVPTX::ST_f64_avar;
   2194       break;
   2195     default:
   2196       return false;
   2197     }
   2198     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
   2199                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
   2200                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
   2201                       Chain };
   2202     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   2203   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
   2204                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
   2205     switch (SourceVT) {
   2206     case MVT::i8:
   2207       Opcode = NVPTX::ST_i8_asi;
   2208       break;
   2209     case MVT::i16:
   2210       Opcode = NVPTX::ST_i16_asi;
   2211       break;
   2212     case MVT::i32:
   2213       Opcode = NVPTX::ST_i32_asi;
   2214       break;
   2215     case MVT::i64:
   2216       Opcode = NVPTX::ST_i64_asi;
   2217       break;
   2218     case MVT::f32:
   2219       Opcode = NVPTX::ST_f32_asi;
   2220       break;
   2221     case MVT::f64:
   2222       Opcode = NVPTX::ST_f64_asi;
   2223       break;
   2224     default:
   2225       return false;
   2226     }
   2227     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
   2228                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
   2229                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
   2230                       Offset, Chain };
   2231     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   2232   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
   2233                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
   2234     if (TM.is64Bit()) {
   2235       switch (SourceVT) {
   2236       case MVT::i8:
   2237         Opcode = NVPTX::ST_i8_ari_64;
   2238         break;
   2239       case MVT::i16:
   2240         Opcode = NVPTX::ST_i16_ari_64;
   2241         break;
   2242       case MVT::i32:
   2243         Opcode = NVPTX::ST_i32_ari_64;
   2244         break;
   2245       case MVT::i64:
   2246         Opcode = NVPTX::ST_i64_ari_64;
   2247         break;
   2248       case MVT::f32:
   2249         Opcode = NVPTX::ST_f32_ari_64;
   2250         break;
   2251       case MVT::f64:
   2252         Opcode = NVPTX::ST_f64_ari_64;
   2253         break;
   2254       default:
   2255         return false;
   2256       }
   2257     } else {
   2258       switch (SourceVT) {
   2259       case MVT::i8:
   2260         Opcode = NVPTX::ST_i8_ari;
   2261         break;
   2262       case MVT::i16:
   2263         Opcode = NVPTX::ST_i16_ari;
   2264         break;
   2265       case MVT::i32:
   2266         Opcode = NVPTX::ST_i32_ari;
   2267         break;
   2268       case MVT::i64:
   2269         Opcode = NVPTX::ST_i64_ari;
   2270         break;
   2271       case MVT::f32:
   2272         Opcode = NVPTX::ST_f32_ari;
   2273         break;
   2274       case MVT::f64:
   2275         Opcode = NVPTX::ST_f64_ari;
   2276         break;
   2277       default:
   2278         return false;
   2279       }
   2280     }
   2281     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
   2282                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
   2283                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
   2284                       Offset, Chain };
   2285     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   2286   } else {
   2287     if (TM.is64Bit()) {
   2288       switch (SourceVT) {
   2289       case MVT::i8:
   2290         Opcode = NVPTX::ST_i8_areg_64;
   2291         break;
   2292       case MVT::i16:
   2293         Opcode = NVPTX::ST_i16_areg_64;
   2294         break;
   2295       case MVT::i32:
   2296         Opcode = NVPTX::ST_i32_areg_64;
   2297         break;
   2298       case MVT::i64:
   2299         Opcode = NVPTX::ST_i64_areg_64;
   2300         break;
   2301       case MVT::f32:
   2302         Opcode = NVPTX::ST_f32_areg_64;
   2303         break;
   2304       case MVT::f64:
   2305         Opcode = NVPTX::ST_f64_areg_64;
   2306         break;
   2307       default:
   2308         return false;
   2309       }
   2310     } else {
   2311       switch (SourceVT) {
   2312       case MVT::i8:
   2313         Opcode = NVPTX::ST_i8_areg;
   2314         break;
   2315       case MVT::i16:
   2316         Opcode = NVPTX::ST_i16_areg;
   2317         break;
   2318       case MVT::i32:
   2319         Opcode = NVPTX::ST_i32_areg;
   2320         break;
   2321       case MVT::i64:
   2322         Opcode = NVPTX::ST_i64_areg;
   2323         break;
   2324       case MVT::f32:
   2325         Opcode = NVPTX::ST_f32_areg;
   2326         break;
   2327       case MVT::f64:
   2328         Opcode = NVPTX::ST_f64_areg;
   2329         break;
   2330       default:
   2331         return false;
   2332       }
   2333     }
   2334     SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
   2335                       getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
   2336                       getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
   2337                       Chain };
   2338     NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   2339   }
   2340 
   2341   if (!NVPTXST)
   2342     return false;
   2343 
   2344   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2345   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2346   cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2347   ReplaceNode(N, NVPTXST);
   2348   return true;
   2349 }
   2350 
   2351 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
   2352   SDValue Chain = N->getOperand(0);
   2353   SDValue Op1 = N->getOperand(1);
   2354   SDValue Addr, Offset, Base;
   2355   unsigned Opcode;
   2356   SDLoc DL(N);
   2357   SDNode *ST;
   2358   EVT EltVT = Op1.getValueType();
   2359   MemSDNode *MemSD = cast<MemSDNode>(N);
   2360   EVT StoreVT = MemSD->getMemoryVT();
   2361 
   2362   // Address Space Setting
   2363   unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
   2364 
   2365   if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
   2366     report_fatal_error("Cannot store to pointer that points to constant "
   2367                        "memory space");
   2368   }
   2369 
   2370   // Volatile Setting
   2371   // - .volatile is only availalble for .global and .shared
   2372   bool IsVolatile = MemSD->isVolatile();
   2373   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
   2374       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
   2375       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
   2376     IsVolatile = false;
   2377 
   2378   // Type Setting: toType + toTypeWidth
   2379   // - for integer type, always use 'u'
   2380   assert(StoreVT.isSimple() && "Store value is not simple");
   2381   MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
   2382   unsigned ToTypeWidth = ScalarVT.getSizeInBits();
   2383   unsigned ToType;
   2384   if (ScalarVT.isFloatingPoint())
   2385     ToType = NVPTX::PTXLdStInstCode::Float;
   2386   else
   2387     ToType = NVPTX::PTXLdStInstCode::Unsigned;
   2388 
   2389   SmallVector<SDValue, 12> StOps;
   2390   SDValue N2;
   2391   unsigned VecType;
   2392 
   2393   switch (N->getOpcode()) {
   2394   case NVPTXISD::StoreV2:
   2395     VecType = NVPTX::PTXLdStInstCode::V2;
   2396     StOps.push_back(N->getOperand(1));
   2397     StOps.push_back(N->getOperand(2));
   2398     N2 = N->getOperand(3);
   2399     break;
   2400   case NVPTXISD::StoreV4:
   2401     VecType = NVPTX::PTXLdStInstCode::V4;
   2402     StOps.push_back(N->getOperand(1));
   2403     StOps.push_back(N->getOperand(2));
   2404     StOps.push_back(N->getOperand(3));
   2405     StOps.push_back(N->getOperand(4));
   2406     N2 = N->getOperand(5);
   2407     break;
   2408   default:
   2409     return false;
   2410   }
   2411 
   2412   StOps.push_back(getI32Imm(IsVolatile, DL));
   2413   StOps.push_back(getI32Imm(CodeAddrSpace, DL));
   2414   StOps.push_back(getI32Imm(VecType, DL));
   2415   StOps.push_back(getI32Imm(ToType, DL));
   2416   StOps.push_back(getI32Imm(ToTypeWidth, DL));
   2417 
   2418   if (SelectDirectAddr(N2, Addr)) {
   2419     switch (N->getOpcode()) {
   2420     default:
   2421       return false;
   2422     case NVPTXISD::StoreV2:
   2423       switch (EltVT.getSimpleVT().SimpleTy) {
   2424       default:
   2425         return false;
   2426       case MVT::i8:
   2427         Opcode = NVPTX::STV_i8_v2_avar;
   2428         break;
   2429       case MVT::i16:
   2430         Opcode = NVPTX::STV_i16_v2_avar;
   2431         break;
   2432       case MVT::i32:
   2433         Opcode = NVPTX::STV_i32_v2_avar;
   2434         break;
   2435       case MVT::i64:
   2436         Opcode = NVPTX::STV_i64_v2_avar;
   2437         break;
   2438       case MVT::f32:
   2439         Opcode = NVPTX::STV_f32_v2_avar;
   2440         break;
   2441       case MVT::f64:
   2442         Opcode = NVPTX::STV_f64_v2_avar;
   2443         break;
   2444       }
   2445       break;
   2446     case NVPTXISD::StoreV4:
   2447       switch (EltVT.getSimpleVT().SimpleTy) {
   2448       default:
   2449         return false;
   2450       case MVT::i8:
   2451         Opcode = NVPTX::STV_i8_v4_avar;
   2452         break;
   2453       case MVT::i16:
   2454         Opcode = NVPTX::STV_i16_v4_avar;
   2455         break;
   2456       case MVT::i32:
   2457         Opcode = NVPTX::STV_i32_v4_avar;
   2458         break;
   2459       case MVT::f32:
   2460         Opcode = NVPTX::STV_f32_v4_avar;
   2461         break;
   2462       }
   2463       break;
   2464     }
   2465     StOps.push_back(Addr);
   2466   } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
   2467                           : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
   2468     switch (N->getOpcode()) {
   2469     default:
   2470       return false;
   2471     case NVPTXISD::StoreV2:
   2472       switch (EltVT.getSimpleVT().SimpleTy) {
   2473       default:
   2474         return false;
   2475       case MVT::i8:
   2476         Opcode = NVPTX::STV_i8_v2_asi;
   2477         break;
   2478       case MVT::i16:
   2479         Opcode = NVPTX::STV_i16_v2_asi;
   2480         break;
   2481       case MVT::i32:
   2482         Opcode = NVPTX::STV_i32_v2_asi;
   2483         break;
   2484       case MVT::i64:
   2485         Opcode = NVPTX::STV_i64_v2_asi;
   2486         break;
   2487       case MVT::f32:
   2488         Opcode = NVPTX::STV_f32_v2_asi;
   2489         break;
   2490       case MVT::f64:
   2491         Opcode = NVPTX::STV_f64_v2_asi;
   2492         break;
   2493       }
   2494       break;
   2495     case NVPTXISD::StoreV4:
   2496       switch (EltVT.getSimpleVT().SimpleTy) {
   2497       default:
   2498         return false;
   2499       case MVT::i8:
   2500         Opcode = NVPTX::STV_i8_v4_asi;
   2501         break;
   2502       case MVT::i16:
   2503         Opcode = NVPTX::STV_i16_v4_asi;
   2504         break;
   2505       case MVT::i32:
   2506         Opcode = NVPTX::STV_i32_v4_asi;
   2507         break;
   2508       case MVT::f32:
   2509         Opcode = NVPTX::STV_f32_v4_asi;
   2510         break;
   2511       }
   2512       break;
   2513     }
   2514     StOps.push_back(Base);
   2515     StOps.push_back(Offset);
   2516   } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
   2517                           : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
   2518     if (TM.is64Bit()) {
   2519       switch (N->getOpcode()) {
   2520       default:
   2521         return false;
   2522       case NVPTXISD::StoreV2:
   2523         switch (EltVT.getSimpleVT().SimpleTy) {
   2524         default:
   2525           return false;
   2526         case MVT::i8:
   2527           Opcode = NVPTX::STV_i8_v2_ari_64;
   2528           break;
   2529         case MVT::i16:
   2530           Opcode = NVPTX::STV_i16_v2_ari_64;
   2531           break;
   2532         case MVT::i32:
   2533           Opcode = NVPTX::STV_i32_v2_ari_64;
   2534           break;
   2535         case MVT::i64:
   2536           Opcode = NVPTX::STV_i64_v2_ari_64;
   2537           break;
   2538         case MVT::f32:
   2539           Opcode = NVPTX::STV_f32_v2_ari_64;
   2540           break;
   2541         case MVT::f64:
   2542           Opcode = NVPTX::STV_f64_v2_ari_64;
   2543           break;
   2544         }
   2545         break;
   2546       case NVPTXISD::StoreV4:
   2547         switch (EltVT.getSimpleVT().SimpleTy) {
   2548         default:
   2549           return false;
   2550         case MVT::i8:
   2551           Opcode = NVPTX::STV_i8_v4_ari_64;
   2552           break;
   2553         case MVT::i16:
   2554           Opcode = NVPTX::STV_i16_v4_ari_64;
   2555           break;
   2556         case MVT::i32:
   2557           Opcode = NVPTX::STV_i32_v4_ari_64;
   2558           break;
   2559         case MVT::f32:
   2560           Opcode = NVPTX::STV_f32_v4_ari_64;
   2561           break;
   2562         }
   2563         break;
   2564       }
   2565     } else {
   2566       switch (N->getOpcode()) {
   2567       default:
   2568         return false;
   2569       case NVPTXISD::StoreV2:
   2570         switch (EltVT.getSimpleVT().SimpleTy) {
   2571         default:
   2572           return false;
   2573         case MVT::i8:
   2574           Opcode = NVPTX::STV_i8_v2_ari;
   2575           break;
   2576         case MVT::i16:
   2577           Opcode = NVPTX::STV_i16_v2_ari;
   2578           break;
   2579         case MVT::i32:
   2580           Opcode = NVPTX::STV_i32_v2_ari;
   2581           break;
   2582         case MVT::i64:
   2583           Opcode = NVPTX::STV_i64_v2_ari;
   2584           break;
   2585         case MVT::f32:
   2586           Opcode = NVPTX::STV_f32_v2_ari;
   2587           break;
   2588         case MVT::f64:
   2589           Opcode = NVPTX::STV_f64_v2_ari;
   2590           break;
   2591         }
   2592         break;
   2593       case NVPTXISD::StoreV4:
   2594         switch (EltVT.getSimpleVT().SimpleTy) {
   2595         default:
   2596           return false;
   2597         case MVT::i8:
   2598           Opcode = NVPTX::STV_i8_v4_ari;
   2599           break;
   2600         case MVT::i16:
   2601           Opcode = NVPTX::STV_i16_v4_ari;
   2602           break;
   2603         case MVT::i32:
   2604           Opcode = NVPTX::STV_i32_v4_ari;
   2605           break;
   2606         case MVT::f32:
   2607           Opcode = NVPTX::STV_f32_v4_ari;
   2608           break;
   2609         }
   2610         break;
   2611       }
   2612     }
   2613     StOps.push_back(Base);
   2614     StOps.push_back(Offset);
   2615   } else {
   2616     if (TM.is64Bit()) {
   2617       switch (N->getOpcode()) {
   2618       default:
   2619         return false;
   2620       case NVPTXISD::StoreV2:
   2621         switch (EltVT.getSimpleVT().SimpleTy) {
   2622         default:
   2623           return false;
   2624         case MVT::i8:
   2625           Opcode = NVPTX::STV_i8_v2_areg_64;
   2626           break;
   2627         case MVT::i16:
   2628           Opcode = NVPTX::STV_i16_v2_areg_64;
   2629           break;
   2630         case MVT::i32:
   2631           Opcode = NVPTX::STV_i32_v2_areg_64;
   2632           break;
   2633         case MVT::i64:
   2634           Opcode = NVPTX::STV_i64_v2_areg_64;
   2635           break;
   2636         case MVT::f32:
   2637           Opcode = NVPTX::STV_f32_v2_areg_64;
   2638           break;
   2639         case MVT::f64:
   2640           Opcode = NVPTX::STV_f64_v2_areg_64;
   2641           break;
   2642         }
   2643         break;
   2644       case NVPTXISD::StoreV4:
   2645         switch (EltVT.getSimpleVT().SimpleTy) {
   2646         default:
   2647           return false;
   2648         case MVT::i8:
   2649           Opcode = NVPTX::STV_i8_v4_areg_64;
   2650           break;
   2651         case MVT::i16:
   2652           Opcode = NVPTX::STV_i16_v4_areg_64;
   2653           break;
   2654         case MVT::i32:
   2655           Opcode = NVPTX::STV_i32_v4_areg_64;
   2656           break;
   2657         case MVT::f32:
   2658           Opcode = NVPTX::STV_f32_v4_areg_64;
   2659           break;
   2660         }
   2661         break;
   2662       }
   2663     } else {
   2664       switch (N->getOpcode()) {
   2665       default:
   2666         return false;
   2667       case NVPTXISD::StoreV2:
   2668         switch (EltVT.getSimpleVT().SimpleTy) {
   2669         default:
   2670           return false;
   2671         case MVT::i8:
   2672           Opcode = NVPTX::STV_i8_v2_areg;
   2673           break;
   2674         case MVT::i16:
   2675           Opcode = NVPTX::STV_i16_v2_areg;
   2676           break;
   2677         case MVT::i32:
   2678           Opcode = NVPTX::STV_i32_v2_areg;
   2679           break;
   2680         case MVT::i64:
   2681           Opcode = NVPTX::STV_i64_v2_areg;
   2682           break;
   2683         case MVT::f32:
   2684           Opcode = NVPTX::STV_f32_v2_areg;
   2685           break;
   2686         case MVT::f64:
   2687           Opcode = NVPTX::STV_f64_v2_areg;
   2688           break;
   2689         }
   2690         break;
   2691       case NVPTXISD::StoreV4:
   2692         switch (EltVT.getSimpleVT().SimpleTy) {
   2693         default:
   2694           return false;
   2695         case MVT::i8:
   2696           Opcode = NVPTX::STV_i8_v4_areg;
   2697           break;
   2698         case MVT::i16:
   2699           Opcode = NVPTX::STV_i16_v4_areg;
   2700           break;
   2701         case MVT::i32:
   2702           Opcode = NVPTX::STV_i32_v4_areg;
   2703           break;
   2704         case MVT::f32:
   2705           Opcode = NVPTX::STV_f32_v4_areg;
   2706           break;
   2707         }
   2708         break;
   2709       }
   2710     }
   2711     StOps.push_back(N2);
   2712   }
   2713 
   2714   StOps.push_back(Chain);
   2715 
   2716   ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
   2717 
   2718   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2719   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2720   cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2721 
   2722   ReplaceNode(N, ST);
   2723   return true;
   2724 }
   2725 
   2726 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
   2727   SDValue Chain = Node->getOperand(0);
   2728   SDValue Offset = Node->getOperand(2);
   2729   SDValue Flag = Node->getOperand(3);
   2730   SDLoc DL(Node);
   2731   MemSDNode *Mem = cast<MemSDNode>(Node);
   2732 
   2733   unsigned VecSize;
   2734   switch (Node->getOpcode()) {
   2735   default:
   2736     return false;
   2737   case NVPTXISD::LoadParam:
   2738     VecSize = 1;
   2739     break;
   2740   case NVPTXISD::LoadParamV2:
   2741     VecSize = 2;
   2742     break;
   2743   case NVPTXISD::LoadParamV4:
   2744     VecSize = 4;
   2745     break;
   2746   }
   2747 
   2748   EVT EltVT = Node->getValueType(0);
   2749   EVT MemVT = Mem->getMemoryVT();
   2750 
   2751   unsigned Opc = 0;
   2752 
   2753   switch (VecSize) {
   2754   default:
   2755     return false;
   2756   case 1:
   2757     switch (MemVT.getSimpleVT().SimpleTy) {
   2758     default:
   2759       return false;
   2760     case MVT::i1:
   2761       Opc = NVPTX::LoadParamMemI8;
   2762       break;
   2763     case MVT::i8:
   2764       Opc = NVPTX::LoadParamMemI8;
   2765       break;
   2766     case MVT::i16:
   2767       Opc = NVPTX::LoadParamMemI16;
   2768       break;
   2769     case MVT::i32:
   2770       Opc = NVPTX::LoadParamMemI32;
   2771       break;
   2772     case MVT::i64:
   2773       Opc = NVPTX::LoadParamMemI64;
   2774       break;
   2775     case MVT::f32:
   2776       Opc = NVPTX::LoadParamMemF32;
   2777       break;
   2778     case MVT::f64:
   2779       Opc = NVPTX::LoadParamMemF64;
   2780       break;
   2781     }
   2782     break;
   2783   case 2:
   2784     switch (MemVT.getSimpleVT().SimpleTy) {
   2785     default:
   2786       return false;
   2787     case MVT::i1:
   2788       Opc = NVPTX::LoadParamMemV2I8;
   2789       break;
   2790     case MVT::i8:
   2791       Opc = NVPTX::LoadParamMemV2I8;
   2792       break;
   2793     case MVT::i16:
   2794       Opc = NVPTX::LoadParamMemV2I16;
   2795       break;
   2796     case MVT::i32:
   2797       Opc = NVPTX::LoadParamMemV2I32;
   2798       break;
   2799     case MVT::i64:
   2800       Opc = NVPTX::LoadParamMemV2I64;
   2801       break;
   2802     case MVT::f32:
   2803       Opc = NVPTX::LoadParamMemV2F32;
   2804       break;
   2805     case MVT::f64:
   2806       Opc = NVPTX::LoadParamMemV2F64;
   2807       break;
   2808     }
   2809     break;
   2810   case 4:
   2811     switch (MemVT.getSimpleVT().SimpleTy) {
   2812     default:
   2813       return false;
   2814     case MVT::i1:
   2815       Opc = NVPTX::LoadParamMemV4I8;
   2816       break;
   2817     case MVT::i8:
   2818       Opc = NVPTX::LoadParamMemV4I8;
   2819       break;
   2820     case MVT::i16:
   2821       Opc = NVPTX::LoadParamMemV4I16;
   2822       break;
   2823     case MVT::i32:
   2824       Opc = NVPTX::LoadParamMemV4I32;
   2825       break;
   2826     case MVT::f32:
   2827       Opc = NVPTX::LoadParamMemV4F32;
   2828       break;
   2829     }
   2830     break;
   2831   }
   2832 
   2833   SDVTList VTs;
   2834   if (VecSize == 1) {
   2835     VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
   2836   } else if (VecSize == 2) {
   2837     VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
   2838   } else {
   2839     EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
   2840     VTs = CurDAG->getVTList(EVTs);
   2841   }
   2842 
   2843   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
   2844 
   2845   SmallVector<SDValue, 2> Ops;
   2846   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
   2847   Ops.push_back(Chain);
   2848   Ops.push_back(Flag);
   2849 
   2850   ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, VTs, Ops));
   2851   return true;
   2852 }
   2853 
   2854 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
   2855   SDLoc DL(N);
   2856   SDValue Chain = N->getOperand(0);
   2857   SDValue Offset = N->getOperand(1);
   2858   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
   2859   MemSDNode *Mem = cast<MemSDNode>(N);
   2860 
   2861   // How many elements do we have?
   2862   unsigned NumElts = 1;
   2863   switch (N->getOpcode()) {
   2864   default:
   2865     return false;
   2866   case NVPTXISD::StoreRetval:
   2867     NumElts = 1;
   2868     break;
   2869   case NVPTXISD::StoreRetvalV2:
   2870     NumElts = 2;
   2871     break;
   2872   case NVPTXISD::StoreRetvalV4:
   2873     NumElts = 4;
   2874     break;
   2875   }
   2876 
   2877   // Build vector of operands
   2878   SmallVector<SDValue, 6> Ops;
   2879   for (unsigned i = 0; i < NumElts; ++i)
   2880     Ops.push_back(N->getOperand(i + 2));
   2881   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
   2882   Ops.push_back(Chain);
   2883 
   2884   // Determine target opcode
   2885   // If we have an i1, use an 8-bit store. The lowering code in
   2886   // NVPTXISelLowering will have already emitted an upcast.
   2887   unsigned Opcode = 0;
   2888   switch (NumElts) {
   2889   default:
   2890     return false;
   2891   case 1:
   2892     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2893     default:
   2894       return false;
   2895     case MVT::i1:
   2896       Opcode = NVPTX::StoreRetvalI8;
   2897       break;
   2898     case MVT::i8:
   2899       Opcode = NVPTX::StoreRetvalI8;
   2900       break;
   2901     case MVT::i16:
   2902       Opcode = NVPTX::StoreRetvalI16;
   2903       break;
   2904     case MVT::i32:
   2905       Opcode = NVPTX::StoreRetvalI32;
   2906       break;
   2907     case MVT::i64:
   2908       Opcode = NVPTX::StoreRetvalI64;
   2909       break;
   2910     case MVT::f32:
   2911       Opcode = NVPTX::StoreRetvalF32;
   2912       break;
   2913     case MVT::f64:
   2914       Opcode = NVPTX::StoreRetvalF64;
   2915       break;
   2916     }
   2917     break;
   2918   case 2:
   2919     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2920     default:
   2921       return false;
   2922     case MVT::i1:
   2923       Opcode = NVPTX::StoreRetvalV2I8;
   2924       break;
   2925     case MVT::i8:
   2926       Opcode = NVPTX::StoreRetvalV2I8;
   2927       break;
   2928     case MVT::i16:
   2929       Opcode = NVPTX::StoreRetvalV2I16;
   2930       break;
   2931     case MVT::i32:
   2932       Opcode = NVPTX::StoreRetvalV2I32;
   2933       break;
   2934     case MVT::i64:
   2935       Opcode = NVPTX::StoreRetvalV2I64;
   2936       break;
   2937     case MVT::f32:
   2938       Opcode = NVPTX::StoreRetvalV2F32;
   2939       break;
   2940     case MVT::f64:
   2941       Opcode = NVPTX::StoreRetvalV2F64;
   2942       break;
   2943     }
   2944     break;
   2945   case 4:
   2946     switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   2947     default:
   2948       return false;
   2949     case MVT::i1:
   2950       Opcode = NVPTX::StoreRetvalV4I8;
   2951       break;
   2952     case MVT::i8:
   2953       Opcode = NVPTX::StoreRetvalV4I8;
   2954       break;
   2955     case MVT::i16:
   2956       Opcode = NVPTX::StoreRetvalV4I16;
   2957       break;
   2958     case MVT::i32:
   2959       Opcode = NVPTX::StoreRetvalV4I32;
   2960       break;
   2961     case MVT::f32:
   2962       Opcode = NVPTX::StoreRetvalV4F32;
   2963       break;
   2964     }
   2965     break;
   2966   }
   2967 
   2968   SDNode *Ret =
   2969       CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
   2970   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   2971   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   2972   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
   2973 
   2974   ReplaceNode(N, Ret);
   2975   return true;
   2976 }
   2977 
   2978 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
   2979   SDLoc DL(N);
   2980   SDValue Chain = N->getOperand(0);
   2981   SDValue Param = N->getOperand(1);
   2982   unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
   2983   SDValue Offset = N->getOperand(2);
   2984   unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
   2985   MemSDNode *Mem = cast<MemSDNode>(N);
   2986   SDValue Flag = N->getOperand(N->getNumOperands() - 1);
   2987 
   2988   // How many elements do we have?
   2989   unsigned NumElts = 1;
   2990   switch (N->getOpcode()) {
   2991   default:
   2992     return false;
   2993   case NVPTXISD::StoreParamU32:
   2994   case NVPTXISD::StoreParamS32:
   2995   case NVPTXISD::StoreParam:
   2996     NumElts = 1;
   2997     break;
   2998   case NVPTXISD::StoreParamV2:
   2999     NumElts = 2;
   3000     break;
   3001   case NVPTXISD::StoreParamV4:
   3002     NumElts = 4;
   3003     break;
   3004   }
   3005 
   3006   // Build vector of operands
   3007   SmallVector<SDValue, 8> Ops;
   3008   for (unsigned i = 0; i < NumElts; ++i)
   3009     Ops.push_back(N->getOperand(i + 3));
   3010   Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
   3011   Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
   3012   Ops.push_back(Chain);
   3013   Ops.push_back(Flag);
   3014 
   3015   // Determine target opcode
   3016   // If we have an i1, use an 8-bit store. The lowering code in
   3017   // NVPTXISelLowering will have already emitted an upcast.
   3018   unsigned Opcode = 0;
   3019   switch (N->getOpcode()) {
   3020   default:
   3021     switch (NumElts) {
   3022     default:
   3023       return false;
   3024     case 1:
   3025       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   3026       default:
   3027         return false;
   3028       case MVT::i1:
   3029         Opcode = NVPTX::StoreParamI8;
   3030         break;
   3031       case MVT::i8:
   3032         Opcode = NVPTX::StoreParamI8;
   3033         break;
   3034       case MVT::i16:
   3035         Opcode = NVPTX::StoreParamI16;
   3036         break;
   3037       case MVT::i32:
   3038         Opcode = NVPTX::StoreParamI32;
   3039         break;
   3040       case MVT::i64:
   3041         Opcode = NVPTX::StoreParamI64;
   3042         break;
   3043       case MVT::f32:
   3044         Opcode = NVPTX::StoreParamF32;
   3045         break;
   3046       case MVT::f64:
   3047         Opcode = NVPTX::StoreParamF64;
   3048         break;
   3049       }
   3050       break;
   3051     case 2:
   3052       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   3053       default:
   3054         return false;
   3055       case MVT::i1:
   3056         Opcode = NVPTX::StoreParamV2I8;
   3057         break;
   3058       case MVT::i8:
   3059         Opcode = NVPTX::StoreParamV2I8;
   3060         break;
   3061       case MVT::i16:
   3062         Opcode = NVPTX::StoreParamV2I16;
   3063         break;
   3064       case MVT::i32:
   3065         Opcode = NVPTX::StoreParamV2I32;
   3066         break;
   3067       case MVT::i64:
   3068         Opcode = NVPTX::StoreParamV2I64;
   3069         break;
   3070       case MVT::f32:
   3071         Opcode = NVPTX::StoreParamV2F32;
   3072         break;
   3073       case MVT::f64:
   3074         Opcode = NVPTX::StoreParamV2F64;
   3075         break;
   3076       }
   3077       break;
   3078     case 4:
   3079       switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
   3080       default:
   3081         return false;
   3082       case MVT::i1:
   3083         Opcode = NVPTX::StoreParamV4I8;
   3084         break;
   3085       case MVT::i8:
   3086         Opcode = NVPTX::StoreParamV4I8;
   3087         break;
   3088       case MVT::i16:
   3089         Opcode = NVPTX::StoreParamV4I16;
   3090         break;
   3091       case MVT::i32:
   3092         Opcode = NVPTX::StoreParamV4I32;
   3093         break;
   3094       case MVT::f32:
   3095         Opcode = NVPTX::StoreParamV4F32;
   3096         break;
   3097       }
   3098       break;
   3099     }
   3100     break;
   3101   // Special case: if we have a sign-extend/zero-extend node, insert the
   3102   // conversion instruction first, and use that as the value operand to
   3103   // the selected StoreParam node.
   3104   case NVPTXISD::StoreParamU32: {
   3105     Opcode = NVPTX::StoreParamI32;
   3106     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
   3107                                                 MVT::i32);
   3108     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
   3109                                          MVT::i32, Ops[0], CvtNone);
   3110     Ops[0] = SDValue(Cvt, 0);
   3111     break;
   3112   }
   3113   case NVPTXISD::StoreParamS32: {
   3114     Opcode = NVPTX::StoreParamI32;
   3115     SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
   3116                                                 MVT::i32);
   3117     SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
   3118                                          MVT::i32, Ops[0], CvtNone);
   3119     Ops[0] = SDValue(Cvt, 0);
   3120     break;
   3121   }
   3122   }
   3123 
   3124   SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
   3125   SDNode *Ret =
   3126       CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
   3127   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   3128   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   3129   cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
   3130 
   3131   ReplaceNode(N, Ret);
   3132   return true;
   3133 }
   3134 
   3135 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
   3136   SDValue Chain = N->getOperand(0);
   3137   unsigned Opc = 0;
   3138   SmallVector<SDValue, 8> Ops;
   3139 
   3140   switch (N->getOpcode()) {
   3141   default: return false;
   3142   case NVPTXISD::Tex1DFloatS32:
   3143     Opc = NVPTX::TEX_1D_F32_S32;
   3144     break;
   3145   case NVPTXISD::Tex1DFloatFloat:
   3146     Opc = NVPTX::TEX_1D_F32_F32;
   3147     break;
   3148   case NVPTXISD::Tex1DFloatFloatLevel:
   3149     Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
   3150     break;
   3151   case NVPTXISD::Tex1DFloatFloatGrad:
   3152     Opc = NVPTX::TEX_1D_F32_F32_GRAD;
   3153     break;
   3154   case NVPTXISD::Tex1DS32S32:
   3155     Opc = NVPTX::TEX_1D_S32_S32;
   3156     break;
   3157   case NVPTXISD::Tex1DS32Float:
   3158     Opc = NVPTX::TEX_1D_S32_F32;
   3159     break;
   3160   case NVPTXISD::Tex1DS32FloatLevel:
   3161     Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
   3162     break;
   3163   case NVPTXISD::Tex1DS32FloatGrad:
   3164     Opc = NVPTX::TEX_1D_S32_F32_GRAD;
   3165     break;
   3166   case NVPTXISD::Tex1DU32S32:
   3167     Opc = NVPTX::TEX_1D_U32_S32;
   3168     break;
   3169   case NVPTXISD::Tex1DU32Float:
   3170     Opc = NVPTX::TEX_1D_U32_F32;
   3171     break;
   3172   case NVPTXISD::Tex1DU32FloatLevel:
   3173     Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
   3174     break;
   3175   case NVPTXISD::Tex1DU32FloatGrad:
   3176     Opc = NVPTX::TEX_1D_U32_F32_GRAD;
   3177     break;
   3178   case NVPTXISD::Tex1DArrayFloatS32:
   3179     Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
   3180     break;
   3181   case NVPTXISD::Tex1DArrayFloatFloat:
   3182     Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
   3183     break;
   3184   case NVPTXISD::Tex1DArrayFloatFloatLevel:
   3185     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
   3186     break;
   3187   case NVPTXISD::Tex1DArrayFloatFloatGrad:
   3188     Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
   3189     break;
   3190   case NVPTXISD::Tex1DArrayS32S32:
   3191     Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
   3192     break;
   3193   case NVPTXISD::Tex1DArrayS32Float:
   3194     Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
   3195     break;
   3196   case NVPTXISD::Tex1DArrayS32FloatLevel:
   3197     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
   3198     break;
   3199   case NVPTXISD::Tex1DArrayS32FloatGrad:
   3200     Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
   3201     break;
   3202   case NVPTXISD::Tex1DArrayU32S32:
   3203     Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
   3204     break;
   3205   case NVPTXISD::Tex1DArrayU32Float:
   3206     Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
   3207     break;
   3208   case NVPTXISD::Tex1DArrayU32FloatLevel:
   3209     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
   3210     break;
   3211   case NVPTXISD::Tex1DArrayU32FloatGrad:
   3212     Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
   3213     break;
   3214   case NVPTXISD::Tex2DFloatS32:
   3215     Opc = NVPTX::TEX_2D_F32_S32;
   3216     break;
   3217   case NVPTXISD::Tex2DFloatFloat:
   3218     Opc = NVPTX::TEX_2D_F32_F32;
   3219     break;
   3220   case NVPTXISD::Tex2DFloatFloatLevel:
   3221     Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
   3222     break;
   3223   case NVPTXISD::Tex2DFloatFloatGrad:
   3224     Opc = NVPTX::TEX_2D_F32_F32_GRAD;
   3225     break;
   3226   case NVPTXISD::Tex2DS32S32:
   3227     Opc = NVPTX::TEX_2D_S32_S32;
   3228     break;
   3229   case NVPTXISD::Tex2DS32Float:
   3230     Opc = NVPTX::TEX_2D_S32_F32;
   3231     break;
   3232   case NVPTXISD::Tex2DS32FloatLevel:
   3233     Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
   3234     break;
   3235   case NVPTXISD::Tex2DS32FloatGrad:
   3236     Opc = NVPTX::TEX_2D_S32_F32_GRAD;
   3237     break;
   3238   case NVPTXISD::Tex2DU32S32:
   3239     Opc = NVPTX::TEX_2D_U32_S32;
   3240     break;
   3241   case NVPTXISD::Tex2DU32Float:
   3242     Opc = NVPTX::TEX_2D_U32_F32;
   3243     break;
   3244   case NVPTXISD::Tex2DU32FloatLevel:
   3245     Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
   3246     break;
   3247   case NVPTXISD::Tex2DU32FloatGrad:
   3248     Opc = NVPTX::TEX_2D_U32_F32_GRAD;
   3249     break;
   3250   case NVPTXISD::Tex2DArrayFloatS32:
   3251     Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
   3252     break;
   3253   case NVPTXISD::Tex2DArrayFloatFloat:
   3254     Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
   3255     break;
   3256   case NVPTXISD::Tex2DArrayFloatFloatLevel:
   3257     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
   3258     break;
   3259   case NVPTXISD::Tex2DArrayFloatFloatGrad:
   3260     Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
   3261     break;
   3262   case NVPTXISD::Tex2DArrayS32S32:
   3263     Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
   3264     break;
   3265   case NVPTXISD::Tex2DArrayS32Float:
   3266     Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
   3267     break;
   3268   case NVPTXISD::Tex2DArrayS32FloatLevel:
   3269     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
   3270     break;
   3271   case NVPTXISD::Tex2DArrayS32FloatGrad:
   3272     Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
   3273     break;
   3274   case NVPTXISD::Tex2DArrayU32S32:
   3275     Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
   3276     break;
   3277   case NVPTXISD::Tex2DArrayU32Float:
   3278     Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
   3279     break;
   3280   case NVPTXISD::Tex2DArrayU32FloatLevel:
   3281     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
   3282     break;
   3283   case NVPTXISD::Tex2DArrayU32FloatGrad:
   3284     Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
   3285     break;
   3286   case NVPTXISD::Tex3DFloatS32:
   3287     Opc = NVPTX::TEX_3D_F32_S32;
   3288     break;
   3289   case NVPTXISD::Tex3DFloatFloat:
   3290     Opc = NVPTX::TEX_3D_F32_F32;
   3291     break;
   3292   case NVPTXISD::Tex3DFloatFloatLevel:
   3293     Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
   3294     break;
   3295   case NVPTXISD::Tex3DFloatFloatGrad:
   3296     Opc = NVPTX::TEX_3D_F32_F32_GRAD;
   3297     break;
   3298   case NVPTXISD::Tex3DS32S32:
   3299     Opc = NVPTX::TEX_3D_S32_S32;
   3300     break;
   3301   case NVPTXISD::Tex3DS32Float:
   3302     Opc = NVPTX::TEX_3D_S32_F32;
   3303     break;
   3304   case NVPTXISD::Tex3DS32FloatLevel:
   3305     Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
   3306     break;
   3307   case NVPTXISD::Tex3DS32FloatGrad:
   3308     Opc = NVPTX::TEX_3D_S32_F32_GRAD;
   3309     break;
   3310   case NVPTXISD::Tex3DU32S32:
   3311     Opc = NVPTX::TEX_3D_U32_S32;
   3312     break;
   3313   case NVPTXISD::Tex3DU32Float:
   3314     Opc = NVPTX::TEX_3D_U32_F32;
   3315     break;
   3316   case NVPTXISD::Tex3DU32FloatLevel:
   3317     Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
   3318     break;
   3319   case NVPTXISD::Tex3DU32FloatGrad:
   3320     Opc = NVPTX::TEX_3D_U32_F32_GRAD;
   3321     break;
   3322   case NVPTXISD::TexCubeFloatFloat:
   3323     Opc = NVPTX::TEX_CUBE_F32_F32;
   3324     break;
   3325   case NVPTXISD::TexCubeFloatFloatLevel:
   3326     Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
   3327     break;
   3328   case NVPTXISD::TexCubeS32Float:
   3329     Opc = NVPTX::TEX_CUBE_S32_F32;
   3330     break;
   3331   case NVPTXISD::TexCubeS32FloatLevel:
   3332     Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
   3333     break;
   3334   case NVPTXISD::TexCubeU32Float:
   3335     Opc = NVPTX::TEX_CUBE_U32_F32;
   3336     break;
   3337   case NVPTXISD::TexCubeU32FloatLevel:
   3338     Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
   3339     break;
   3340   case NVPTXISD::TexCubeArrayFloatFloat:
   3341     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
   3342     break;
   3343   case NVPTXISD::TexCubeArrayFloatFloatLevel:
   3344     Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
   3345     break;
   3346   case NVPTXISD::TexCubeArrayS32Float:
   3347     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
   3348     break;
   3349   case NVPTXISD::TexCubeArrayS32FloatLevel:
   3350     Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
   3351     break;
   3352   case NVPTXISD::TexCubeArrayU32Float:
   3353     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
   3354     break;
   3355   case NVPTXISD::TexCubeArrayU32FloatLevel:
   3356     Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
   3357     break;
   3358   case NVPTXISD::Tld4R2DFloatFloat:
   3359     Opc = NVPTX::TLD4_R_2D_F32_F32;
   3360     break;
   3361   case NVPTXISD::Tld4G2DFloatFloat:
   3362     Opc = NVPTX::TLD4_G_2D_F32_F32;
   3363     break;
   3364   case NVPTXISD::Tld4B2DFloatFloat:
   3365     Opc = NVPTX::TLD4_B_2D_F32_F32;
   3366     break;
   3367   case NVPTXISD::Tld4A2DFloatFloat:
   3368     Opc = NVPTX::TLD4_A_2D_F32_F32;
   3369     break;
   3370   case NVPTXISD::Tld4R2DS64Float:
   3371     Opc = NVPTX::TLD4_R_2D_S32_F32;
   3372     break;
   3373   case NVPTXISD::Tld4G2DS64Float:
   3374     Opc = NVPTX::TLD4_G_2D_S32_F32;
   3375     break;
   3376   case NVPTXISD::Tld4B2DS64Float:
   3377     Opc = NVPTX::TLD4_B_2D_S32_F32;
   3378     break;
   3379   case NVPTXISD::Tld4A2DS64Float:
   3380     Opc = NVPTX::TLD4_A_2D_S32_F32;
   3381     break;
   3382   case NVPTXISD::Tld4R2DU64Float:
   3383     Opc = NVPTX::TLD4_R_2D_U32_F32;
   3384     break;
   3385   case NVPTXISD::Tld4G2DU64Float:
   3386     Opc = NVPTX::TLD4_G_2D_U32_F32;
   3387     break;
   3388   case NVPTXISD::Tld4B2DU64Float:
   3389     Opc = NVPTX::TLD4_B_2D_U32_F32;
   3390     break;
   3391   case NVPTXISD::Tld4A2DU64Float:
   3392     Opc = NVPTX::TLD4_A_2D_U32_F32;
   3393     break;
   3394   case NVPTXISD::TexUnified1DFloatS32:
   3395     Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
   3396     break;
   3397   case NVPTXISD::TexUnified1DFloatFloat:
   3398     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
   3399     break;
   3400   case NVPTXISD::TexUnified1DFloatFloatLevel:
   3401     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
   3402     break;
   3403   case NVPTXISD::TexUnified1DFloatFloatGrad:
   3404     Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
   3405     break;
   3406   case NVPTXISD::TexUnified1DS32S32:
   3407     Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
   3408     break;
   3409   case NVPTXISD::TexUnified1DS32Float:
   3410     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
   3411     break;
   3412   case NVPTXISD::TexUnified1DS32FloatLevel:
   3413     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
   3414     break;
   3415   case NVPTXISD::TexUnified1DS32FloatGrad:
   3416     Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
   3417     break;
   3418   case NVPTXISD::TexUnified1DU32S32:
   3419     Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
   3420     break;
   3421   case NVPTXISD::TexUnified1DU32Float:
   3422     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
   3423     break;
   3424   case NVPTXISD::TexUnified1DU32FloatLevel:
   3425     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
   3426     break;
   3427   case NVPTXISD::TexUnified1DU32FloatGrad:
   3428     Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
   3429     break;
   3430   case NVPTXISD::TexUnified1DArrayFloatS32:
   3431     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
   3432     break;
   3433   case NVPTXISD::TexUnified1DArrayFloatFloat:
   3434     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
   3435     break;
   3436   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
   3437     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
   3438     break;
   3439   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
   3440     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
   3441     break;
   3442   case NVPTXISD::TexUnified1DArrayS32S32:
   3443     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
   3444     break;
   3445   case NVPTXISD::TexUnified1DArrayS32Float:
   3446     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
   3447     break;
   3448   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
   3449     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
   3450     break;
   3451   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
   3452     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
   3453     break;
   3454   case NVPTXISD::TexUnified1DArrayU32S32:
   3455     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
   3456     break;
   3457   case NVPTXISD::TexUnified1DArrayU32Float:
   3458     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
   3459     break;
   3460   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
   3461     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
   3462     break;
   3463   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
   3464     Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
   3465     break;
   3466   case NVPTXISD::TexUnified2DFloatS32:
   3467     Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
   3468     break;
   3469   case NVPTXISD::TexUnified2DFloatFloat:
   3470     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
   3471     break;
   3472   case NVPTXISD::TexUnified2DFloatFloatLevel:
   3473     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
   3474     break;
   3475   case NVPTXISD::TexUnified2DFloatFloatGrad:
   3476     Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
   3477     break;
   3478   case NVPTXISD::TexUnified2DS32S32:
   3479     Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
   3480     break;
   3481   case NVPTXISD::TexUnified2DS32Float:
   3482     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
   3483     break;
   3484   case NVPTXISD::TexUnified2DS32FloatLevel:
   3485     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
   3486     break;
   3487   case NVPTXISD::TexUnified2DS32FloatGrad:
   3488     Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
   3489     break;
   3490   case NVPTXISD::TexUnified2DU32S32:
   3491     Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
   3492     break;
   3493   case NVPTXISD::TexUnified2DU32Float:
   3494     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
   3495     break;
   3496   case NVPTXISD::TexUnified2DU32FloatLevel:
   3497     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
   3498     break;
   3499   case NVPTXISD::TexUnified2DU32FloatGrad:
   3500     Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
   3501     break;
   3502   case NVPTXISD::TexUnified2DArrayFloatS32:
   3503     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
   3504     break;
   3505   case NVPTXISD::TexUnified2DArrayFloatFloat:
   3506     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
   3507     break;
   3508   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
   3509     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
   3510     break;
   3511   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
   3512     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
   3513     break;
   3514   case NVPTXISD::TexUnified2DArrayS32S32:
   3515     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
   3516     break;
   3517   case NVPTXISD::TexUnified2DArrayS32Float:
   3518     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
   3519     break;
   3520   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
   3521     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
   3522     break;
   3523   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
   3524     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
   3525     break;
   3526   case NVPTXISD::TexUnified2DArrayU32S32:
   3527     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
   3528     break;
   3529   case NVPTXISD::TexUnified2DArrayU32Float:
   3530     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
   3531     break;
   3532   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
   3533     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
   3534     break;
   3535   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
   3536     Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
   3537     break;
   3538   case NVPTXISD::TexUnified3DFloatS32:
   3539     Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
   3540     break;
   3541   case NVPTXISD::TexUnified3DFloatFloat:
   3542     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
   3543     break;
   3544   case NVPTXISD::TexUnified3DFloatFloatLevel:
   3545     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
   3546     break;
   3547   case NVPTXISD::TexUnified3DFloatFloatGrad:
   3548     Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
   3549     break;
   3550   case NVPTXISD::TexUnified3DS32S32:
   3551     Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
   3552     break;
   3553   case NVPTXISD::TexUnified3DS32Float:
   3554     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
   3555     break;
   3556   case NVPTXISD::TexUnified3DS32FloatLevel:
   3557     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
   3558     break;
   3559   case NVPTXISD::TexUnified3DS32FloatGrad:
   3560     Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
   3561     break;
   3562   case NVPTXISD::TexUnified3DU32S32:
   3563     Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
   3564     break;
   3565   case NVPTXISD::TexUnified3DU32Float:
   3566     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
   3567     break;
   3568   case NVPTXISD::TexUnified3DU32FloatLevel:
   3569     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
   3570     break;
   3571   case NVPTXISD::TexUnified3DU32FloatGrad:
   3572     Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
   3573     break;
   3574   case NVPTXISD::TexUnifiedCubeFloatFloat:
   3575     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
   3576     break;
   3577   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
   3578     Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
   3579     break;
   3580   case NVPTXISD::TexUnifiedCubeS32Float:
   3581     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
   3582     break;
   3583   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
   3584     Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
   3585     break;
   3586   case NVPTXISD::TexUnifiedCubeU32Float:
   3587     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
   3588     break;
   3589   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
   3590     Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
   3591     break;
   3592   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
   3593     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
   3594     break;
   3595   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
   3596     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
   3597     break;
   3598   case NVPTXISD::TexUnifiedCubeArrayS32Float:
   3599     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
   3600     break;
   3601   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
   3602     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
   3603     break;
   3604   case NVPTXISD::TexUnifiedCubeArrayU32Float:
   3605     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
   3606     break;
   3607   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
   3608     Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
   3609     break;
   3610   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
   3611     Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
   3612     break;
   3613   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
   3614     Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
   3615     break;
   3616   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
   3617     Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
   3618     break;
   3619   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
   3620     Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
   3621     break;
   3622   case NVPTXISD::Tld4UnifiedR2DS64Float:
   3623     Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
   3624     break;
   3625   case NVPTXISD::Tld4UnifiedG2DS64Float:
   3626     Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
   3627     break;
   3628   case NVPTXISD::Tld4UnifiedB2DS64Float:
   3629     Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
   3630     break;
   3631   case NVPTXISD::Tld4UnifiedA2DS64Float:
   3632     Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
   3633     break;
   3634   case NVPTXISD::Tld4UnifiedR2DU64Float:
   3635     Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
   3636     break;
   3637   case NVPTXISD::Tld4UnifiedG2DU64Float:
   3638     Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
   3639     break;
   3640   case NVPTXISD::Tld4UnifiedB2DU64Float:
   3641     Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
   3642     break;
   3643   case NVPTXISD::Tld4UnifiedA2DU64Float:
   3644     Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
   3645     break;
   3646   }
   3647 
   3648   // Copy over operands
   3649   for (unsigned i = 1; i < N->getNumOperands(); ++i) {
   3650     Ops.push_back(N->getOperand(i));
   3651   }
   3652 
   3653   Ops.push_back(Chain);
   3654   ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
   3655   return true;
   3656 }
   3657 
   3658 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
   3659   SDValue Chain = N->getOperand(0);
   3660   SDValue TexHandle = N->getOperand(1);
   3661   unsigned Opc = 0;
   3662   SmallVector<SDValue, 8> Ops;
   3663   switch (N->getOpcode()) {
   3664   default: return false;
   3665   case NVPTXISD::Suld1DI8Clamp:
   3666     Opc = NVPTX::SULD_1D_I8_CLAMP;
   3667     Ops.push_back(TexHandle);
   3668     Ops.push_back(N->getOperand(2));
   3669     Ops.push_back(Chain);
   3670     break;
   3671   case NVPTXISD::Suld1DI16Clamp:
   3672     Opc = NVPTX::SULD_1D_I16_CLAMP;
   3673     Ops.push_back(TexHandle);
   3674     Ops.push_back(N->getOperand(2));
   3675     Ops.push_back(Chain);
   3676     break;
   3677   case NVPTXISD::Suld1DI32Clamp:
   3678     Opc = NVPTX::SULD_1D_I32_CLAMP;
   3679     Ops.push_back(TexHandle);
   3680     Ops.push_back(N->getOperand(2));
   3681     Ops.push_back(Chain);
   3682     break;
   3683   case NVPTXISD::Suld1DI64Clamp:
   3684     Opc = NVPTX::SULD_1D_I64_CLAMP;
   3685     Ops.push_back(TexHandle);
   3686     Ops.push_back(N->getOperand(2));
   3687     Ops.push_back(Chain);
   3688     break;
   3689   case NVPTXISD::Suld1DV2I8Clamp:
   3690     Opc = NVPTX::SULD_1D_V2I8_CLAMP;
   3691     Ops.push_back(TexHandle);
   3692     Ops.push_back(N->getOperand(2));
   3693     Ops.push_back(Chain);
   3694     break;
   3695   case NVPTXISD::Suld1DV2I16Clamp:
   3696     Opc = NVPTX::SULD_1D_V2I16_CLAMP;
   3697     Ops.push_back(TexHandle);
   3698     Ops.push_back(N->getOperand(2));
   3699     Ops.push_back(Chain);
   3700     break;
   3701   case NVPTXISD::Suld1DV2I32Clamp:
   3702     Opc = NVPTX::SULD_1D_V2I32_CLAMP;
   3703     Ops.push_back(TexHandle);
   3704     Ops.push_back(N->getOperand(2));
   3705     Ops.push_back(Chain);
   3706     break;
   3707   case NVPTXISD::Suld1DV2I64Clamp:
   3708     Opc = NVPTX::SULD_1D_V2I64_CLAMP;
   3709     Ops.push_back(TexHandle);
   3710     Ops.push_back(N->getOperand(2));
   3711     Ops.push_back(Chain);
   3712     break;
   3713   case NVPTXISD::Suld1DV4I8Clamp:
   3714     Opc = NVPTX::SULD_1D_V4I8_CLAMP;
   3715     Ops.push_back(TexHandle);
   3716     Ops.push_back(N->getOperand(2));
   3717     Ops.push_back(Chain);
   3718     break;
   3719   case NVPTXISD::Suld1DV4I16Clamp:
   3720     Opc = NVPTX::SULD_1D_V4I16_CLAMP;
   3721     Ops.push_back(TexHandle);
   3722     Ops.push_back(N->getOperand(2));
   3723     Ops.push_back(Chain);
   3724     break;
   3725   case NVPTXISD::Suld1DV4I32Clamp:
   3726     Opc = NVPTX::SULD_1D_V4I32_CLAMP;
   3727     Ops.push_back(TexHandle);
   3728     Ops.push_back(N->getOperand(2));
   3729     Ops.push_back(Chain);
   3730     break;
   3731   case NVPTXISD::Suld1DArrayI8Clamp:
   3732     Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
   3733     Ops.push_back(TexHandle);
   3734     Ops.push_back(N->getOperand(2));
   3735     Ops.push_back(N->getOperand(3));
   3736     Ops.push_back(Chain);
   3737     break;
   3738   case NVPTXISD::Suld1DArrayI16Clamp:
   3739     Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
   3740     Ops.push_back(TexHandle);
   3741     Ops.push_back(N->getOperand(2));
   3742     Ops.push_back(N->getOperand(3));
   3743     Ops.push_back(Chain);
   3744     break;
   3745   case NVPTXISD::Suld1DArrayI32Clamp:
   3746     Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
   3747     Ops.push_back(TexHandle);
   3748     Ops.push_back(N->getOperand(2));
   3749     Ops.push_back(N->getOperand(3));
   3750     Ops.push_back(Chain);
   3751     break;
   3752   case NVPTXISD::Suld1DArrayI64Clamp:
   3753     Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
   3754     Ops.push_back(TexHandle);
   3755     Ops.push_back(N->getOperand(2));
   3756     Ops.push_back(N->getOperand(3));
   3757     Ops.push_back(Chain);
   3758     break;
   3759   case NVPTXISD::Suld1DArrayV2I8Clamp:
   3760     Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
   3761     Ops.push_back(TexHandle);
   3762     Ops.push_back(N->getOperand(2));
   3763     Ops.push_back(N->getOperand(3));
   3764     Ops.push_back(Chain);
   3765     break;
   3766   case NVPTXISD::Suld1DArrayV2I16Clamp:
   3767     Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
   3768     Ops.push_back(TexHandle);
   3769     Ops.push_back(N->getOperand(2));
   3770     Ops.push_back(N->getOperand(3));
   3771     Ops.push_back(Chain);
   3772     break;
   3773   case NVPTXISD::Suld1DArrayV2I32Clamp:
   3774     Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
   3775     Ops.push_back(TexHandle);
   3776     Ops.push_back(N->getOperand(2));
   3777     Ops.push_back(N->getOperand(3));
   3778     Ops.push_back(Chain);
   3779     break;
   3780   case NVPTXISD::Suld1DArrayV2I64Clamp:
   3781     Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
   3782     Ops.push_back(TexHandle);
   3783     Ops.push_back(N->getOperand(2));
   3784     Ops.push_back(N->getOperand(3));
   3785     Ops.push_back(Chain);
   3786     break;
   3787   case NVPTXISD::Suld1DArrayV4I8Clamp:
   3788     Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
   3789     Ops.push_back(TexHandle);
   3790     Ops.push_back(N->getOperand(2));
   3791     Ops.push_back(N->getOperand(3));
   3792     Ops.push_back(Chain);
   3793     break;
   3794   case NVPTXISD::Suld1DArrayV4I16Clamp:
   3795     Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
   3796     Ops.push_back(TexHandle);
   3797     Ops.push_back(N->getOperand(2));
   3798     Ops.push_back(N->getOperand(3));
   3799     Ops.push_back(Chain);
   3800     break;
   3801   case NVPTXISD::Suld1DArrayV4I32Clamp:
   3802     Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
   3803     Ops.push_back(TexHandle);
   3804     Ops.push_back(N->getOperand(2));
   3805     Ops.push_back(N->getOperand(3));
   3806     Ops.push_back(Chain);
   3807     break;
   3808   case NVPTXISD::Suld2DI8Clamp:
   3809     Opc = NVPTX::SULD_2D_I8_CLAMP;
   3810     Ops.push_back(TexHandle);
   3811     Ops.push_back(N->getOperand(2));
   3812     Ops.push_back(N->getOperand(3));
   3813     Ops.push_back(Chain);
   3814     break;
   3815   case NVPTXISD::Suld2DI16Clamp:
   3816     Opc = NVPTX::SULD_2D_I16_CLAMP;
   3817     Ops.push_back(TexHandle);
   3818     Ops.push_back(N->getOperand(2));
   3819     Ops.push_back(N->getOperand(3));
   3820     Ops.push_back(Chain);
   3821     break;
   3822   case NVPTXISD::Suld2DI32Clamp:
   3823     Opc = NVPTX::SULD_2D_I32_CLAMP;
   3824     Ops.push_back(TexHandle);
   3825     Ops.push_back(N->getOperand(2));
   3826     Ops.push_back(N->getOperand(3));
   3827     Ops.push_back(Chain);
   3828     break;
   3829   case NVPTXISD::Suld2DI64Clamp:
   3830     Opc = NVPTX::SULD_2D_I64_CLAMP;
   3831     Ops.push_back(TexHandle);
   3832     Ops.push_back(N->getOperand(2));
   3833     Ops.push_back(N->getOperand(3));
   3834     Ops.push_back(Chain);
   3835     break;
   3836   case NVPTXISD::Suld2DV2I8Clamp:
   3837     Opc = NVPTX::SULD_2D_V2I8_CLAMP;
   3838     Ops.push_back(TexHandle);
   3839     Ops.push_back(N->getOperand(2));
   3840     Ops.push_back(N->getOperand(3));
   3841     Ops.push_back(Chain);
   3842     break;
   3843   case NVPTXISD::Suld2DV2I16Clamp:
   3844     Opc = NVPTX::SULD_2D_V2I16_CLAMP;
   3845     Ops.push_back(TexHandle);
   3846     Ops.push_back(N->getOperand(2));
   3847     Ops.push_back(N->getOperand(3));
   3848     Ops.push_back(Chain);
   3849     break;
   3850   case NVPTXISD::Suld2DV2I32Clamp:
   3851     Opc = NVPTX::SULD_2D_V2I32_CLAMP;
   3852     Ops.push_back(TexHandle);
   3853     Ops.push_back(N->getOperand(2));
   3854     Ops.push_back(N->getOperand(3));
   3855     Ops.push_back(Chain);
   3856     break;
   3857   case NVPTXISD::Suld2DV2I64Clamp:
   3858     Opc = NVPTX::SULD_2D_V2I64_CLAMP;
   3859     Ops.push_back(TexHandle);
   3860     Ops.push_back(N->getOperand(2));
   3861     Ops.push_back(N->getOperand(3));
   3862     Ops.push_back(Chain);
   3863     break;
   3864   case NVPTXISD::Suld2DV4I8Clamp:
   3865     Opc = NVPTX::SULD_2D_V4I8_CLAMP;
   3866     Ops.push_back(TexHandle);
   3867     Ops.push_back(N->getOperand(2));
   3868     Ops.push_back(N->getOperand(3));
   3869     Ops.push_back(Chain);
   3870     break;
   3871   case NVPTXISD::Suld2DV4I16Clamp:
   3872     Opc = NVPTX::SULD_2D_V4I16_CLAMP;
   3873     Ops.push_back(TexHandle);
   3874     Ops.push_back(N->getOperand(2));
   3875     Ops.push_back(N->getOperand(3));
   3876     Ops.push_back(Chain);
   3877     break;
   3878   case NVPTXISD::Suld2DV4I32Clamp:
   3879     Opc = NVPTX::SULD_2D_V4I32_CLAMP;
   3880     Ops.push_back(TexHandle);
   3881     Ops.push_back(N->getOperand(2));
   3882     Ops.push_back(N->getOperand(3));
   3883     Ops.push_back(Chain);
   3884     break;
   3885   case NVPTXISD::Suld2DArrayI8Clamp:
   3886     Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
   3887     Ops.push_back(TexHandle);
   3888     Ops.push_back(N->getOperand(2));
   3889     Ops.push_back(N->getOperand(3));
   3890     Ops.push_back(N->getOperand(4));
   3891     Ops.push_back(Chain);
   3892     break;
   3893   case NVPTXISD::Suld2DArrayI16Clamp:
   3894     Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
   3895     Ops.push_back(TexHandle);
   3896     Ops.push_back(N->getOperand(2));
   3897     Ops.push_back(N->getOperand(3));
   3898     Ops.push_back(N->getOperand(4));
   3899     Ops.push_back(Chain);
   3900     break;
   3901   case NVPTXISD::Suld2DArrayI32Clamp:
   3902     Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
   3903     Ops.push_back(TexHandle);
   3904     Ops.push_back(N->getOperand(2));
   3905     Ops.push_back(N->getOperand(3));
   3906     Ops.push_back(N->getOperand(4));
   3907     Ops.push_back(Chain);
   3908     break;
   3909   case NVPTXISD::Suld2DArrayI64Clamp:
   3910     Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
   3911     Ops.push_back(TexHandle);
   3912     Ops.push_back(N->getOperand(2));
   3913     Ops.push_back(N->getOperand(3));
   3914     Ops.push_back(N->getOperand(4));
   3915     Ops.push_back(Chain);
   3916     break;
   3917   case NVPTXISD::Suld2DArrayV2I8Clamp:
   3918     Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
   3919     Ops.push_back(TexHandle);
   3920     Ops.push_back(N->getOperand(2));
   3921     Ops.push_back(N->getOperand(3));
   3922     Ops.push_back(N->getOperand(4));
   3923     Ops.push_back(Chain);
   3924     break;
   3925   case NVPTXISD::Suld2DArrayV2I16Clamp:
   3926     Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
   3927     Ops.push_back(TexHandle);
   3928     Ops.push_back(N->getOperand(2));
   3929     Ops.push_back(N->getOperand(3));
   3930     Ops.push_back(N->getOperand(4));
   3931     Ops.push_back(Chain);
   3932     break;
   3933   case NVPTXISD::Suld2DArrayV2I32Clamp:
   3934     Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
   3935     Ops.push_back(TexHandle);
   3936     Ops.push_back(N->getOperand(2));
   3937     Ops.push_back(N->getOperand(3));
   3938     Ops.push_back(N->getOperand(4));
   3939     Ops.push_back(Chain);
   3940     break;
   3941   case NVPTXISD::Suld2DArrayV2I64Clamp:
   3942     Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
   3943     Ops.push_back(TexHandle);
   3944     Ops.push_back(N->getOperand(2));
   3945     Ops.push_back(N->getOperand(3));
   3946     Ops.push_back(N->getOperand(4));
   3947     Ops.push_back(Chain);
   3948     break;
   3949   case NVPTXISD::Suld2DArrayV4I8Clamp:
   3950     Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
   3951     Ops.push_back(TexHandle);
   3952     Ops.push_back(N->getOperand(2));
   3953     Ops.push_back(N->getOperand(3));
   3954     Ops.push_back(N->getOperand(4));
   3955     Ops.push_back(Chain);
   3956     break;
   3957   case NVPTXISD::Suld2DArrayV4I16Clamp:
   3958     Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
   3959     Ops.push_back(TexHandle);
   3960     Ops.push_back(N->getOperand(2));
   3961     Ops.push_back(N->getOperand(3));
   3962     Ops.push_back(N->getOperand(4));
   3963     Ops.push_back(Chain);
   3964     break;
   3965   case NVPTXISD::Suld2DArrayV4I32Clamp:
   3966     Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
   3967     Ops.push_back(TexHandle);
   3968     Ops.push_back(N->getOperand(2));
   3969     Ops.push_back(N->getOperand(3));
   3970     Ops.push_back(N->getOperand(4));
   3971     Ops.push_back(Chain);
   3972     break;
   3973   case NVPTXISD::Suld3DI8Clamp:
   3974     Opc = NVPTX::SULD_3D_I8_CLAMP;
   3975     Ops.push_back(TexHandle);
   3976     Ops.push_back(N->getOperand(2));
   3977     Ops.push_back(N->getOperand(3));
   3978     Ops.push_back(N->getOperand(4));
   3979     Ops.push_back(Chain);
   3980     break;
   3981   case NVPTXISD::Suld3DI16Clamp:
   3982     Opc = NVPTX::SULD_3D_I16_CLAMP;
   3983     Ops.push_back(TexHandle);
   3984     Ops.push_back(N->getOperand(2));
   3985     Ops.push_back(N->getOperand(3));
   3986     Ops.push_back(N->getOperand(4));
   3987     Ops.push_back(Chain);
   3988     break;
   3989   case NVPTXISD::Suld3DI32Clamp:
   3990     Opc = NVPTX::SULD_3D_I32_CLAMP;
   3991     Ops.push_back(TexHandle);
   3992     Ops.push_back(N->getOperand(2));
   3993     Ops.push_back(N->getOperand(3));
   3994     Ops.push_back(N->getOperand(4));
   3995     Ops.push_back(Chain);
   3996     break;
   3997   case NVPTXISD::Suld3DI64Clamp:
   3998     Opc = NVPTX::SULD_3D_I64_CLAMP;
   3999     Ops.push_back(TexHandle);
   4000     Ops.push_back(N->getOperand(2));
   4001     Ops.push_back(N->getOperand(3));
   4002     Ops.push_back(N->getOperand(4));
   4003     Ops.push_back(Chain);
   4004     break;
   4005   case NVPTXISD::Suld3DV2I8Clamp:
   4006     Opc = NVPTX::SULD_3D_V2I8_CLAMP;
   4007     Ops.push_back(TexHandle);
   4008     Ops.push_back(N->getOperand(2));
   4009     Ops.push_back(N->getOperand(3));
   4010     Ops.push_back(N->getOperand(4));
   4011     Ops.push_back(Chain);
   4012     break;
   4013   case NVPTXISD::Suld3DV2I16Clamp:
   4014     Opc = NVPTX::SULD_3D_V2I16_CLAMP;
   4015     Ops.push_back(TexHandle);
   4016     Ops.push_back(N->getOperand(2));
   4017     Ops.push_back(N->getOperand(3));
   4018     Ops.push_back(N->getOperand(4));
   4019     Ops.push_back(Chain);
   4020     break;
   4021   case NVPTXISD::Suld3DV2I32Clamp:
   4022     Opc = NVPTX::SULD_3D_V2I32_CLAMP;
   4023     Ops.push_back(TexHandle);
   4024     Ops.push_back(N->getOperand(2));
   4025     Ops.push_back(N->getOperand(3));
   4026     Ops.push_back(N->getOperand(4));
   4027     Ops.push_back(Chain);
   4028     break;
   4029   case NVPTXISD::Suld3DV2I64Clamp:
   4030     Opc = NVPTX::SULD_3D_V2I64_CLAMP;
   4031     Ops.push_back(TexHandle);
   4032     Ops.push_back(N->getOperand(2));
   4033     Ops.push_back(N->getOperand(3));
   4034     Ops.push_back(N->getOperand(4));
   4035     Ops.push_back(Chain);
   4036     break;
   4037   case NVPTXISD::Suld3DV4I8Clamp:
   4038     Opc = NVPTX::SULD_3D_V4I8_CLAMP;
   4039     Ops.push_back(TexHandle);
   4040     Ops.push_back(N->getOperand(2));
   4041     Ops.push_back(N->getOperand(3));
   4042     Ops.push_back(N->getOperand(4));
   4043     Ops.push_back(Chain);
   4044     break;
   4045   case NVPTXISD::Suld3DV4I16Clamp:
   4046     Opc = NVPTX::SULD_3D_V4I16_CLAMP;
   4047     Ops.push_back(TexHandle);
   4048     Ops.push_back(N->getOperand(2));
   4049     Ops.push_back(N->getOperand(3));
   4050     Ops.push_back(N->getOperand(4));
   4051     Ops.push_back(Chain);
   4052     break;
   4053   case NVPTXISD::Suld3DV4I32Clamp:
   4054     Opc = NVPTX::SULD_3D_V4I32_CLAMP;
   4055     Ops.push_back(TexHandle);
   4056     Ops.push_back(N->getOperand(2));
   4057     Ops.push_back(N->getOperand(3));
   4058     Ops.push_back(N->getOperand(4));
   4059     Ops.push_back(Chain);
   4060     break;
   4061   case NVPTXISD::Suld1DI8Trap:
   4062     Opc = NVPTX::SULD_1D_I8_TRAP;
   4063     Ops.push_back(TexHandle);
   4064     Ops.push_back(N->getOperand(2));
   4065     Ops.push_back(Chain);
   4066     break;
   4067   case NVPTXISD::Suld1DI16Trap:
   4068     Opc = NVPTX::SULD_1D_I16_TRAP;
   4069     Ops.push_back(TexHandle);
   4070     Ops.push_back(N->getOperand(2));
   4071     Ops.push_back(Chain);
   4072     break;
   4073   case NVPTXISD::Suld1DI32Trap:
   4074     Opc = NVPTX::SULD_1D_I32_TRAP;
   4075     Ops.push_back(TexHandle);
   4076     Ops.push_back(N->getOperand(2));
   4077     Ops.push_back(Chain);
   4078     break;
   4079   case NVPTXISD::Suld1DI64Trap:
   4080     Opc = NVPTX::SULD_1D_I64_TRAP;
   4081     Ops.push_back(TexHandle);
   4082     Ops.push_back(N->getOperand(2));
   4083     Ops.push_back(Chain);
   4084     break;
   4085   case NVPTXISD::Suld1DV2I8Trap:
   4086     Opc = NVPTX::SULD_1D_V2I8_TRAP;
   4087     Ops.push_back(TexHandle);
   4088     Ops.push_back(N->getOperand(2));
   4089     Ops.push_back(Chain);
   4090     break;
   4091   case NVPTXISD::Suld1DV2I16Trap:
   4092     Opc = NVPTX::SULD_1D_V2I16_TRAP;
   4093     Ops.push_back(TexHandle);
   4094     Ops.push_back(N->getOperand(2));
   4095     Ops.push_back(Chain);
   4096     break;
   4097   case NVPTXISD::Suld1DV2I32Trap:
   4098     Opc = NVPTX::SULD_1D_V2I32_TRAP;
   4099     Ops.push_back(TexHandle);
   4100     Ops.push_back(N->getOperand(2));
   4101     Ops.push_back(Chain);
   4102     break;
   4103   case NVPTXISD::Suld1DV2I64Trap:
   4104     Opc = NVPTX::SULD_1D_V2I64_TRAP;
   4105     Ops.push_back(TexHandle);
   4106     Ops.push_back(N->getOperand(2));
   4107     Ops.push_back(Chain);
   4108     break;
   4109   case NVPTXISD::Suld1DV4I8Trap:
   4110     Opc = NVPTX::SULD_1D_V4I8_TRAP;
   4111     Ops.push_back(TexHandle);
   4112     Ops.push_back(N->getOperand(2));
   4113     Ops.push_back(Chain);
   4114     break;
   4115   case NVPTXISD::Suld1DV4I16Trap:
   4116     Opc = NVPTX::SULD_1D_V4I16_TRAP;
   4117     Ops.push_back(TexHandle);
   4118     Ops.push_back(N->getOperand(2));
   4119     Ops.push_back(Chain);
   4120     break;
   4121   case NVPTXISD::Suld1DV4I32Trap:
   4122     Opc = NVPTX::SULD_1D_V4I32_TRAP;
   4123     Ops.push_back(TexHandle);
   4124     Ops.push_back(N->getOperand(2));
   4125     Ops.push_back(Chain);
   4126     break;
   4127   case NVPTXISD::Suld1DArrayI8Trap:
   4128     Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
   4129     Ops.push_back(TexHandle);
   4130     Ops.push_back(N->getOperand(2));
   4131     Ops.push_back(N->getOperand(3));
   4132     Ops.push_back(Chain);
   4133     break;
   4134   case NVPTXISD::Suld1DArrayI16Trap:
   4135     Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
   4136     Ops.push_back(TexHandle);
   4137     Ops.push_back(N->getOperand(2));
   4138     Ops.push_back(N->getOperand(3));
   4139     Ops.push_back(Chain);
   4140     break;
   4141   case NVPTXISD::Suld1DArrayI32Trap:
   4142     Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
   4143     Ops.push_back(TexHandle);
   4144     Ops.push_back(N->getOperand(2));
   4145     Ops.push_back(N->getOperand(3));
   4146     Ops.push_back(Chain);
   4147     break;
   4148   case NVPTXISD::Suld1DArrayI64Trap:
   4149     Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
   4150     Ops.push_back(TexHandle);
   4151     Ops.push_back(N->getOperand(2));
   4152     Ops.push_back(N->getOperand(3));
   4153     Ops.push_back(Chain);
   4154     break;
   4155   case NVPTXISD::Suld1DArrayV2I8Trap:
   4156     Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
   4157     Ops.push_back(TexHandle);
   4158     Ops.push_back(N->getOperand(2));
   4159     Ops.push_back(N->getOperand(3));
   4160     Ops.push_back(Chain);
   4161     break;
   4162   case NVPTXISD::Suld1DArrayV2I16Trap:
   4163     Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
   4164     Ops.push_back(TexHandle);
   4165     Ops.push_back(N->getOperand(2));
   4166     Ops.push_back(N->getOperand(3));
   4167     Ops.push_back(Chain);
   4168     break;
   4169   case NVPTXISD::Suld1DArrayV2I32Trap:
   4170     Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
   4171     Ops.push_back(TexHandle);
   4172     Ops.push_back(N->getOperand(2));
   4173     Ops.push_back(N->getOperand(3));
   4174     Ops.push_back(Chain);
   4175     break;
   4176   case NVPTXISD::Suld1DArrayV2I64Trap:
   4177     Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
   4178     Ops.push_back(TexHandle);
   4179     Ops.push_back(N->getOperand(2));
   4180     Ops.push_back(N->getOperand(3));
   4181     Ops.push_back(Chain);
   4182     break;
   4183   case NVPTXISD::Suld1DArrayV4I8Trap:
   4184     Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
   4185     Ops.push_back(TexHandle);
   4186     Ops.push_back(N->getOperand(2));
   4187     Ops.push_back(N->getOperand(3));
   4188     Ops.push_back(Chain);
   4189     break;
   4190   case NVPTXISD::Suld1DArrayV4I16Trap:
   4191     Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
   4192     Ops.push_back(TexHandle);
   4193     Ops.push_back(N->getOperand(2));
   4194     Ops.push_back(N->getOperand(3));
   4195     Ops.push_back(Chain);
   4196     break;
   4197   case NVPTXISD::Suld1DArrayV4I32Trap:
   4198     Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
   4199     Ops.push_back(TexHandle);
   4200     Ops.push_back(N->getOperand(2));
   4201     Ops.push_back(N->getOperand(3));
   4202     Ops.push_back(Chain);
   4203     break;
   4204   case NVPTXISD::Suld2DI8Trap:
   4205     Opc = NVPTX::SULD_2D_I8_TRAP;
   4206     Ops.push_back(TexHandle);
   4207     Ops.push_back(N->getOperand(2));
   4208     Ops.push_back(N->getOperand(3));
   4209     Ops.push_back(Chain);
   4210     break;
   4211   case NVPTXISD::Suld2DI16Trap:
   4212     Opc = NVPTX::SULD_2D_I16_TRAP;
   4213     Ops.push_back(TexHandle);
   4214     Ops.push_back(N->getOperand(2));
   4215     Ops.push_back(N->getOperand(3));
   4216     Ops.push_back(Chain);
   4217     break;
   4218   case NVPTXISD::Suld2DI32Trap:
   4219     Opc = NVPTX::SULD_2D_I32_TRAP;
   4220     Ops.push_back(TexHandle);
   4221     Ops.push_back(N->getOperand(2));
   4222     Ops.push_back(N->getOperand(3));
   4223     Ops.push_back(Chain);
   4224     break;
   4225   case NVPTXISD::Suld2DI64Trap:
   4226     Opc = NVPTX::SULD_2D_I64_TRAP;
   4227     Ops.push_back(TexHandle);
   4228     Ops.push_back(N->getOperand(2));
   4229     Ops.push_back(N->getOperand(3));
   4230     Ops.push_back(Chain);
   4231     break;
   4232   case NVPTXISD::Suld2DV2I8Trap:
   4233     Opc = NVPTX::SULD_2D_V2I8_TRAP;
   4234     Ops.push_back(TexHandle);
   4235     Ops.push_back(N->getOperand(2));
   4236     Ops.push_back(N->getOperand(3));
   4237     Ops.push_back(Chain);
   4238     break;
   4239   case NVPTXISD::Suld2DV2I16Trap:
   4240     Opc = NVPTX::SULD_2D_V2I16_TRAP;
   4241     Ops.push_back(TexHandle);
   4242     Ops.push_back(N->getOperand(2));
   4243     Ops.push_back(N->getOperand(3));
   4244     Ops.push_back(Chain);
   4245     break;
   4246   case NVPTXISD::Suld2DV2I32Trap:
   4247     Opc = NVPTX::SULD_2D_V2I32_TRAP;
   4248     Ops.push_back(TexHandle);
   4249     Ops.push_back(N->getOperand(2));
   4250     Ops.push_back(N->getOperand(3));
   4251     Ops.push_back(Chain);
   4252     break;
   4253   case NVPTXISD::Suld2DV2I64Trap:
   4254     Opc = NVPTX::SULD_2D_V2I64_TRAP;
   4255     Ops.push_back(TexHandle);
   4256     Ops.push_back(N->getOperand(2));
   4257     Ops.push_back(N->getOperand(3));
   4258     Ops.push_back(Chain);
   4259     break;
   4260   case NVPTXISD::Suld2DV4I8Trap:
   4261     Opc = NVPTX::SULD_2D_V4I8_TRAP;
   4262     Ops.push_back(TexHandle);
   4263     Ops.push_back(N->getOperand(2));
   4264     Ops.push_back(N->getOperand(3));
   4265     Ops.push_back(Chain);
   4266     break;
   4267   case NVPTXISD::Suld2DV4I16Trap:
   4268     Opc = NVPTX::SULD_2D_V4I16_TRAP;
   4269     Ops.push_back(TexHandle);
   4270     Ops.push_back(N->getOperand(2));
   4271     Ops.push_back(N->getOperand(3));
   4272     Ops.push_back(Chain);
   4273     break;
   4274   case NVPTXISD::Suld2DV4I32Trap:
   4275     Opc = NVPTX::SULD_2D_V4I32_TRAP;
   4276     Ops.push_back(TexHandle);
   4277     Ops.push_back(N->getOperand(2));
   4278     Ops.push_back(N->getOperand(3));
   4279     Ops.push_back(Chain);
   4280     break;
   4281   case NVPTXISD::Suld2DArrayI8Trap:
   4282     Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
   4283     Ops.push_back(TexHandle);
   4284     Ops.push_back(N->getOperand(2));
   4285     Ops.push_back(N->getOperand(3));
   4286     Ops.push_back(N->getOperand(4));
   4287     Ops.push_back(Chain);
   4288     break;
   4289   case NVPTXISD::Suld2DArrayI16Trap:
   4290     Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
   4291     Ops.push_back(TexHandle);
   4292     Ops.push_back(N->getOperand(2));
   4293     Ops.push_back(N->getOperand(3));
   4294     Ops.push_back(N->getOperand(4));
   4295     Ops.push_back(Chain);
   4296     break;
   4297   case NVPTXISD::Suld2DArrayI32Trap:
   4298     Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
   4299     Ops.push_back(TexHandle);
   4300     Ops.push_back(N->getOperand(2));
   4301     Ops.push_back(N->getOperand(3));
   4302     Ops.push_back(N->getOperand(4));
   4303     Ops.push_back(Chain);
   4304     break;
   4305   case NVPTXISD::Suld2DArrayI64Trap:
   4306     Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
   4307     Ops.push_back(TexHandle);
   4308     Ops.push_back(N->getOperand(2));
   4309     Ops.push_back(N->getOperand(3));
   4310     Ops.push_back(N->getOperand(4));
   4311     Ops.push_back(Chain);
   4312     break;
   4313   case NVPTXISD::Suld2DArrayV2I8Trap:
   4314     Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
   4315     Ops.push_back(TexHandle);
   4316     Ops.push_back(N->getOperand(2));
   4317     Ops.push_back(N->getOperand(3));
   4318     Ops.push_back(N->getOperand(4));
   4319     Ops.push_back(Chain);
   4320     break;
   4321   case NVPTXISD::Suld2DArrayV2I16Trap:
   4322     Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
   4323     Ops.push_back(TexHandle);
   4324     Ops.push_back(N->getOperand(2));
   4325     Ops.push_back(N->getOperand(3));
   4326     Ops.push_back(N->getOperand(4));
   4327     Ops.push_back(Chain);
   4328     break;
   4329   case NVPTXISD::Suld2DArrayV2I32Trap:
   4330     Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
   4331     Ops.push_back(TexHandle);
   4332     Ops.push_back(N->getOperand(2));
   4333     Ops.push_back(N->getOperand(3));
   4334     Ops.push_back(N->getOperand(4));
   4335     Ops.push_back(Chain);
   4336     break;
   4337   case NVPTXISD::Suld2DArrayV2I64Trap:
   4338     Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
   4339     Ops.push_back(TexHandle);
   4340     Ops.push_back(N->getOperand(2));
   4341     Ops.push_back(N->getOperand(3));
   4342     Ops.push_back(N->getOperand(4));
   4343     Ops.push_back(Chain);
   4344     break;
   4345   case NVPTXISD::Suld2DArrayV4I8Trap:
   4346     Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
   4347     Ops.push_back(TexHandle);
   4348     Ops.push_back(N->getOperand(2));
   4349     Ops.push_back(N->getOperand(3));
   4350     Ops.push_back(N->getOperand(4));
   4351     Ops.push_back(Chain);
   4352     break;
   4353   case NVPTXISD::Suld2DArrayV4I16Trap:
   4354     Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
   4355     Ops.push_back(TexHandle);
   4356     Ops.push_back(N->getOperand(2));
   4357     Ops.push_back(N->getOperand(3));
   4358     Ops.push_back(N->getOperand(4));
   4359     Ops.push_back(Chain);
   4360     break;
   4361   case NVPTXISD::Suld2DArrayV4I32Trap:
   4362     Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
   4363     Ops.push_back(TexHandle);
   4364     Ops.push_back(N->getOperand(2));
   4365     Ops.push_back(N->getOperand(3));
   4366     Ops.push_back(N->getOperand(4));
   4367     Ops.push_back(Chain);
   4368     break;
   4369   case NVPTXISD::Suld3DI8Trap:
   4370     Opc = NVPTX::SULD_3D_I8_TRAP;
   4371     Ops.push_back(TexHandle);
   4372     Ops.push_back(N->getOperand(2));
   4373     Ops.push_back(N->getOperand(3));
   4374     Ops.push_back(N->getOperand(4));
   4375     Ops.push_back(Chain);
   4376     break;
   4377   case NVPTXISD::Suld3DI16Trap:
   4378     Opc = NVPTX::SULD_3D_I16_TRAP;
   4379     Ops.push_back(TexHandle);
   4380     Ops.push_back(N->getOperand(2));
   4381     Ops.push_back(N->getOperand(3));
   4382     Ops.push_back(N->getOperand(4));
   4383     Ops.push_back(Chain);
   4384     break;
   4385   case NVPTXISD::Suld3DI32Trap:
   4386     Opc = NVPTX::SULD_3D_I32_TRAP;
   4387     Ops.push_back(TexHandle);
   4388     Ops.push_back(N->getOperand(2));
   4389     Ops.push_back(N->getOperand(3));
   4390     Ops.push_back(N->getOperand(4));
   4391     Ops.push_back(Chain);
   4392     break;
   4393   case NVPTXISD::Suld3DI64Trap:
   4394     Opc = NVPTX::SULD_3D_I64_TRAP;
   4395     Ops.push_back(TexHandle);
   4396     Ops.push_back(N->getOperand(2));
   4397     Ops.push_back(N->getOperand(3));
   4398     Ops.push_back(N->getOperand(4));
   4399     Ops.push_back(Chain);
   4400     break;
   4401   case NVPTXISD::Suld3DV2I8Trap:
   4402     Opc = NVPTX::SULD_3D_V2I8_TRAP;
   4403     Ops.push_back(TexHandle);
   4404     Ops.push_back(N->getOperand(2));
   4405     Ops.push_back(N->getOperand(3));
   4406     Ops.push_back(N->getOperand(4));
   4407     Ops.push_back(Chain);
   4408     break;
   4409   case NVPTXISD::Suld3DV2I16Trap:
   4410     Opc = NVPTX::SULD_3D_V2I16_TRAP;
   4411     Ops.push_back(TexHandle);
   4412     Ops.push_back(N->getOperand(2));
   4413     Ops.push_back(N->getOperand(3));
   4414     Ops.push_back(N->getOperand(4));
   4415     Ops.push_back(Chain);
   4416     break;
   4417   case NVPTXISD::Suld3DV2I32Trap:
   4418     Opc = NVPTX::SULD_3D_V2I32_TRAP;
   4419     Ops.push_back(TexHandle);
   4420     Ops.push_back(N->getOperand(2));
   4421     Ops.push_back(N->getOperand(3));
   4422     Ops.push_back(N->getOperand(4));
   4423     Ops.push_back(Chain);
   4424     break;
   4425   case NVPTXISD::Suld3DV2I64Trap:
   4426     Opc = NVPTX::SULD_3D_V2I64_TRAP;
   4427     Ops.push_back(TexHandle);
   4428     Ops.push_back(N->getOperand(2));
   4429     Ops.push_back(N->getOperand(3));
   4430     Ops.push_back(N->getOperand(4));
   4431     Ops.push_back(Chain);
   4432     break;
   4433   case NVPTXISD::Suld3DV4I8Trap:
   4434     Opc = NVPTX::SULD_3D_V4I8_TRAP;
   4435     Ops.push_back(TexHandle);
   4436     Ops.push_back(N->getOperand(2));
   4437     Ops.push_back(N->getOperand(3));
   4438     Ops.push_back(N->getOperand(4));
   4439     Ops.push_back(Chain);
   4440     break;
   4441   case NVPTXISD::Suld3DV4I16Trap:
   4442     Opc = NVPTX::SULD_3D_V4I16_TRAP;
   4443     Ops.push_back(TexHandle);
   4444     Ops.push_back(N->getOperand(2));
   4445     Ops.push_back(N->getOperand(3));
   4446     Ops.push_back(N->getOperand(4));
   4447     Ops.push_back(Chain);
   4448     break;
   4449   case NVPTXISD::Suld3DV4I32Trap:
   4450     Opc = NVPTX::SULD_3D_V4I32_TRAP;
   4451     Ops.push_back(TexHandle);
   4452     Ops.push_back(N->getOperand(2));
   4453     Ops.push_back(N->getOperand(3));
   4454     Ops.push_back(N->getOperand(4));
   4455     Ops.push_back(Chain);
   4456     break;
   4457   case NVPTXISD::Suld1DI8Zero:
   4458     Opc = NVPTX::SULD_1D_I8_ZERO;
   4459     Ops.push_back(TexHandle);
   4460     Ops.push_back(N->getOperand(2));
   4461     Ops.push_back(Chain);
   4462     break;
   4463   case NVPTXISD::Suld1DI16Zero:
   4464     Opc = NVPTX::SULD_1D_I16_ZERO;
   4465     Ops.push_back(TexHandle);
   4466     Ops.push_back(N->getOperand(2));
   4467     Ops.push_back(Chain);
   4468     break;
   4469   case NVPTXISD::Suld1DI32Zero:
   4470     Opc = NVPTX::SULD_1D_I32_ZERO;
   4471     Ops.push_back(TexHandle);
   4472     Ops.push_back(N->getOperand(2));
   4473     Ops.push_back(Chain);
   4474     break;
   4475   case NVPTXISD::Suld1DI64Zero:
   4476     Opc = NVPTX::SULD_1D_I64_ZERO;
   4477     Ops.push_back(TexHandle);
   4478     Ops.push_back(N->getOperand(2));
   4479     Ops.push_back(Chain);
   4480     break;
   4481   case NVPTXISD::Suld1DV2I8Zero:
   4482     Opc = NVPTX::SULD_1D_V2I8_ZERO;
   4483     Ops.push_back(TexHandle);
   4484     Ops.push_back(N->getOperand(2));
   4485     Ops.push_back(Chain);
   4486     break;
   4487   case NVPTXISD::Suld1DV2I16Zero:
   4488     Opc = NVPTX::SULD_1D_V2I16_ZERO;
   4489     Ops.push_back(TexHandle);
   4490     Ops.push_back(N->getOperand(2));
   4491     Ops.push_back(Chain);
   4492     break;
   4493   case NVPTXISD::Suld1DV2I32Zero:
   4494     Opc = NVPTX::SULD_1D_V2I32_ZERO;
   4495     Ops.push_back(TexHandle);
   4496     Ops.push_back(N->getOperand(2));
   4497     Ops.push_back(Chain);
   4498     break;
   4499   case NVPTXISD::Suld1DV2I64Zero:
   4500     Opc = NVPTX::SULD_1D_V2I64_ZERO;
   4501     Ops.push_back(TexHandle);
   4502     Ops.push_back(N->getOperand(2));
   4503     Ops.push_back(Chain);
   4504     break;
   4505   case NVPTXISD::Suld1DV4I8Zero:
   4506     Opc = NVPTX::SULD_1D_V4I8_ZERO;
   4507     Ops.push_back(TexHandle);
   4508     Ops.push_back(N->getOperand(2));
   4509     Ops.push_back(Chain);
   4510     break;
   4511   case NVPTXISD::Suld1DV4I16Zero:
   4512     Opc = NVPTX::SULD_1D_V4I16_ZERO;
   4513     Ops.push_back(TexHandle);
   4514     Ops.push_back(N->getOperand(2));
   4515     Ops.push_back(Chain);
   4516     break;
   4517   case NVPTXISD::Suld1DV4I32Zero:
   4518     Opc = NVPTX::SULD_1D_V4I32_ZERO;
   4519     Ops.push_back(TexHandle);
   4520     Ops.push_back(N->getOperand(2));
   4521     Ops.push_back(Chain);
   4522     break;
   4523   case NVPTXISD::Suld1DArrayI8Zero:
   4524     Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
   4525     Ops.push_back(TexHandle);
   4526     Ops.push_back(N->getOperand(2));
   4527     Ops.push_back(N->getOperand(3));
   4528     Ops.push_back(Chain);
   4529     break;
   4530   case NVPTXISD::Suld1DArrayI16Zero:
   4531     Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
   4532     Ops.push_back(TexHandle);
   4533     Ops.push_back(N->getOperand(2));
   4534     Ops.push_back(N->getOperand(3));
   4535     Ops.push_back(Chain);
   4536     break;
   4537   case NVPTXISD::Suld1DArrayI32Zero:
   4538     Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
   4539     Ops.push_back(TexHandle);
   4540     Ops.push_back(N->getOperand(2));
   4541     Ops.push_back(N->getOperand(3));
   4542     Ops.push_back(Chain);
   4543     break;
   4544   case NVPTXISD::Suld1DArrayI64Zero:
   4545     Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
   4546     Ops.push_back(TexHandle);
   4547     Ops.push_back(N->getOperand(2));
   4548     Ops.push_back(N->getOperand(3));
   4549     Ops.push_back(Chain);
   4550     break;
   4551   case NVPTXISD::Suld1DArrayV2I8Zero:
   4552     Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
   4553     Ops.push_back(TexHandle);
   4554     Ops.push_back(N->getOperand(2));
   4555     Ops.push_back(N->getOperand(3));
   4556     Ops.push_back(Chain);
   4557     break;
   4558   case NVPTXISD::Suld1DArrayV2I16Zero:
   4559     Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
   4560     Ops.push_back(TexHandle);
   4561     Ops.push_back(N->getOperand(2));
   4562     Ops.push_back(N->getOperand(3));
   4563     Ops.push_back(Chain);
   4564     break;
   4565   case NVPTXISD::Suld1DArrayV2I32Zero:
   4566     Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
   4567     Ops.push_back(TexHandle);
   4568     Ops.push_back(N->getOperand(2));
   4569     Ops.push_back(N->getOperand(3));
   4570     Ops.push_back(Chain);
   4571     break;
   4572   case NVPTXISD::Suld1DArrayV2I64Zero:
   4573     Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
   4574     Ops.push_back(TexHandle);
   4575     Ops.push_back(N->getOperand(2));
   4576     Ops.push_back(N->getOperand(3));
   4577     Ops.push_back(Chain);
   4578     break;
   4579   case NVPTXISD::Suld1DArrayV4I8Zero:
   4580     Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
   4581     Ops.push_back(TexHandle);
   4582     Ops.push_back(N->getOperand(2));
   4583     Ops.push_back(N->getOperand(3));
   4584     Ops.push_back(Chain);
   4585     break;
   4586   case NVPTXISD::Suld1DArrayV4I16Zero:
   4587     Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
   4588     Ops.push_back(TexHandle);
   4589     Ops.push_back(N->getOperand(2));
   4590     Ops.push_back(N->getOperand(3));
   4591     Ops.push_back(Chain);
   4592     break;
   4593   case NVPTXISD::Suld1DArrayV4I32Zero:
   4594     Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
   4595     Ops.push_back(TexHandle);
   4596     Ops.push_back(N->getOperand(2));
   4597     Ops.push_back(N->getOperand(3));
   4598     Ops.push_back(Chain);
   4599     break;
   4600   case NVPTXISD::Suld2DI8Zero:
   4601     Opc = NVPTX::SULD_2D_I8_ZERO;
   4602     Ops.push_back(TexHandle);
   4603     Ops.push_back(N->getOperand(2));
   4604     Ops.push_back(N->getOperand(3));
   4605     Ops.push_back(Chain);
   4606     break;
   4607   case NVPTXISD::Suld2DI16Zero:
   4608     Opc = NVPTX::SULD_2D_I16_ZERO;
   4609     Ops.push_back(TexHandle);
   4610     Ops.push_back(N->getOperand(2));
   4611     Ops.push_back(N->getOperand(3));
   4612     Ops.push_back(Chain);
   4613     break;
   4614   case NVPTXISD::Suld2DI32Zero:
   4615     Opc = NVPTX::SULD_2D_I32_ZERO;
   4616     Ops.push_back(TexHandle);
   4617     Ops.push_back(N->getOperand(2));
   4618     Ops.push_back(N->getOperand(3));
   4619     Ops.push_back(Chain);
   4620     break;
   4621   case NVPTXISD::Suld2DI64Zero:
   4622     Opc = NVPTX::SULD_2D_I64_ZERO;
   4623     Ops.push_back(TexHandle);
   4624     Ops.push_back(N->getOperand(2));
   4625     Ops.push_back(N->getOperand(3));
   4626     Ops.push_back(Chain);
   4627     break;
   4628   case NVPTXISD::Suld2DV2I8Zero:
   4629     Opc = NVPTX::SULD_2D_V2I8_ZERO;
   4630     Ops.push_back(TexHandle);
   4631     Ops.push_back(N->getOperand(2));
   4632     Ops.push_back(N->getOperand(3));
   4633     Ops.push_back(Chain);
   4634     break;
   4635   case NVPTXISD::Suld2DV2I16Zero:
   4636     Opc = NVPTX::SULD_2D_V2I16_ZERO;
   4637     Ops.push_back(TexHandle);
   4638     Ops.push_back(N->getOperand(2));
   4639     Ops.push_back(N->getOperand(3));
   4640     Ops.push_back(Chain);
   4641     break;
   4642   case NVPTXISD::Suld2DV2I32Zero:
   4643     Opc = NVPTX::SULD_2D_V2I32_ZERO;
   4644     Ops.push_back(TexHandle);
   4645     Ops.push_back(N->getOperand(2));
   4646     Ops.push_back(N->getOperand(3));
   4647     Ops.push_back(Chain);
   4648     break;
   4649   case NVPTXISD::Suld2DV2I64Zero:
   4650     Opc = NVPTX::SULD_2D_V2I64_ZERO;
   4651     Ops.push_back(TexHandle);
   4652     Ops.push_back(N->getOperand(2));
   4653     Ops.push_back(N->getOperand(3));
   4654     Ops.push_back(Chain);
   4655     break;
   4656   case NVPTXISD::Suld2DV4I8Zero:
   4657     Opc = NVPTX::SULD_2D_V4I8_ZERO;
   4658     Ops.push_back(TexHandle);
   4659     Ops.push_back(N->getOperand(2));
   4660     Ops.push_back(N->getOperand(3));
   4661     Ops.push_back(Chain);
   4662     break;
   4663   case NVPTXISD::Suld2DV4I16Zero:
   4664     Opc = NVPTX::SULD_2D_V4I16_ZERO;
   4665     Ops.push_back(TexHandle);
   4666     Ops.push_back(N->getOperand(2));
   4667     Ops.push_back(N->getOperand(3));
   4668     Ops.push_back(Chain);
   4669     break;
   4670   case NVPTXISD::Suld2DV4I32Zero:
   4671     Opc = NVPTX::SULD_2D_V4I32_ZERO;
   4672     Ops.push_back(TexHandle);
   4673     Ops.push_back(N->getOperand(2));
   4674     Ops.push_back(N->getOperand(3));
   4675     Ops.push_back(Chain);
   4676     break;
   4677   case NVPTXISD::Suld2DArrayI8Zero:
   4678     Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
   4679     Ops.push_back(TexHandle);
   4680     Ops.push_back(N->getOperand(2));
   4681     Ops.push_back(N->getOperand(3));
   4682     Ops.push_back(N->getOperand(4));
   4683     Ops.push_back(Chain);
   4684     break;
   4685   case NVPTXISD::Suld2DArrayI16Zero:
   4686     Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
   4687     Ops.push_back(TexHandle);
   4688     Ops.push_back(N->getOperand(2));
   4689     Ops.push_back(N->getOperand(3));
   4690     Ops.push_back(N->getOperand(4));
   4691     Ops.push_back(Chain);
   4692     break;
   4693   case NVPTXISD::Suld2DArrayI32Zero:
   4694     Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
   4695     Ops.push_back(TexHandle);
   4696     Ops.push_back(N->getOperand(2));
   4697     Ops.push_back(N->getOperand(3));
   4698     Ops.push_back(N->getOperand(4));
   4699     Ops.push_back(Chain);
   4700     break;
   4701   case NVPTXISD::Suld2DArrayI64Zero:
   4702     Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
   4703     Ops.push_back(TexHandle);
   4704     Ops.push_back(N->getOperand(2));
   4705     Ops.push_back(N->getOperand(3));
   4706     Ops.push_back(N->getOperand(4));
   4707     Ops.push_back(Chain);
   4708     break;
   4709   case NVPTXISD::Suld2DArrayV2I8Zero:
   4710     Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
   4711     Ops.push_back(TexHandle);
   4712     Ops.push_back(N->getOperand(2));
   4713     Ops.push_back(N->getOperand(3));
   4714     Ops.push_back(N->getOperand(4));
   4715     Ops.push_back(Chain);
   4716     break;
   4717   case NVPTXISD::Suld2DArrayV2I16Zero:
   4718     Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
   4719     Ops.push_back(TexHandle);
   4720     Ops.push_back(N->getOperand(2));
   4721     Ops.push_back(N->getOperand(3));
   4722     Ops.push_back(N->getOperand(4));
   4723     Ops.push_back(Chain);
   4724     break;
   4725   case NVPTXISD::Suld2DArrayV2I32Zero:
   4726     Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
   4727     Ops.push_back(TexHandle);
   4728     Ops.push_back(N->getOperand(2));
   4729     Ops.push_back(N->getOperand(3));
   4730     Ops.push_back(N->getOperand(4));
   4731     Ops.push_back(Chain);
   4732     break;
   4733   case NVPTXISD::Suld2DArrayV2I64Zero:
   4734     Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
   4735     Ops.push_back(TexHandle);
   4736     Ops.push_back(N->getOperand(2));
   4737     Ops.push_back(N->getOperand(3));
   4738     Ops.push_back(N->getOperand(4));
   4739     Ops.push_back(Chain);
   4740     break;
   4741   case NVPTXISD::Suld2DArrayV4I8Zero:
   4742     Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
   4743     Ops.push_back(TexHandle);
   4744     Ops.push_back(N->getOperand(2));
   4745     Ops.push_back(N->getOperand(3));
   4746     Ops.push_back(N->getOperand(4));
   4747     Ops.push_back(Chain);
   4748     break;
   4749   case NVPTXISD::Suld2DArrayV4I16Zero:
   4750     Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
   4751     Ops.push_back(TexHandle);
   4752     Ops.push_back(N->getOperand(2));
   4753     Ops.push_back(N->getOperand(3));
   4754     Ops.push_back(N->getOperand(4));
   4755     Ops.push_back(Chain);
   4756     break;
   4757   case NVPTXISD::Suld2DArrayV4I32Zero:
   4758     Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
   4759     Ops.push_back(TexHandle);
   4760     Ops.push_back(N->getOperand(2));
   4761     Ops.push_back(N->getOperand(3));
   4762     Ops.push_back(N->getOperand(4));
   4763     Ops.push_back(Chain);
   4764     break;
   4765   case NVPTXISD::Suld3DI8Zero:
   4766     Opc = NVPTX::SULD_3D_I8_ZERO;
   4767     Ops.push_back(TexHandle);
   4768     Ops.push_back(N->getOperand(2));
   4769     Ops.push_back(N->getOperand(3));
   4770     Ops.push_back(N->getOperand(4));
   4771     Ops.push_back(Chain);
   4772     break;
   4773   case NVPTXISD::Suld3DI16Zero:
   4774     Opc = NVPTX::SULD_3D_I16_ZERO;
   4775     Ops.push_back(TexHandle);
   4776     Ops.push_back(N->getOperand(2));
   4777     Ops.push_back(N->getOperand(3));
   4778     Ops.push_back(N->getOperand(4));
   4779     Ops.push_back(Chain);
   4780     break;
   4781   case NVPTXISD::Suld3DI32Zero:
   4782     Opc = NVPTX::SULD_3D_I32_ZERO;
   4783     Ops.push_back(TexHandle);
   4784     Ops.push_back(N->getOperand(2));
   4785     Ops.push_back(N->getOperand(3));
   4786     Ops.push_back(N->getOperand(4));
   4787     Ops.push_back(Chain);
   4788     break;
   4789   case NVPTXISD::Suld3DI64Zero:
   4790     Opc = NVPTX::SULD_3D_I64_ZERO;
   4791     Ops.push_back(TexHandle);
   4792     Ops.push_back(N->getOperand(2));
   4793     Ops.push_back(N->getOperand(3));
   4794     Ops.push_back(N->getOperand(4));
   4795     Ops.push_back(Chain);
   4796     break;
   4797   case NVPTXISD::Suld3DV2I8Zero:
   4798     Opc = NVPTX::SULD_3D_V2I8_ZERO;
   4799     Ops.push_back(TexHandle);
   4800     Ops.push_back(N->getOperand(2));
   4801     Ops.push_back(N->getOperand(3));
   4802     Ops.push_back(N->getOperand(4));
   4803     Ops.push_back(Chain);
   4804     break;
   4805   case NVPTXISD::Suld3DV2I16Zero:
   4806     Opc = NVPTX::SULD_3D_V2I16_ZERO;
   4807     Ops.push_back(TexHandle);
   4808     Ops.push_back(N->getOperand(2));
   4809     Ops.push_back(N->getOperand(3));
   4810     Ops.push_back(N->getOperand(4));
   4811     Ops.push_back(Chain);
   4812     break;
   4813   case NVPTXISD::Suld3DV2I32Zero:
   4814     Opc = NVPTX::SULD_3D_V2I32_ZERO;
   4815     Ops.push_back(TexHandle);
   4816     Ops.push_back(N->getOperand(2));
   4817     Ops.push_back(N->getOperand(3));
   4818     Ops.push_back(N->getOperand(4));
   4819     Ops.push_back(Chain);
   4820     break;
   4821   case NVPTXISD::Suld3DV2I64Zero:
   4822     Opc = NVPTX::SULD_3D_V2I64_ZERO;
   4823     Ops.push_back(TexHandle);
   4824     Ops.push_back(N->getOperand(2));
   4825     Ops.push_back(N->getOperand(3));
   4826     Ops.push_back(N->getOperand(4));
   4827     Ops.push_back(Chain);
   4828     break;
   4829   case NVPTXISD::Suld3DV4I8Zero:
   4830     Opc = NVPTX::SULD_3D_V4I8_ZERO;
   4831     Ops.push_back(TexHandle);
   4832     Ops.push_back(N->getOperand(2));
   4833     Ops.push_back(N->getOperand(3));
   4834     Ops.push_back(N->getOperand(4));
   4835     Ops.push_back(Chain);
   4836     break;
   4837   case NVPTXISD::Suld3DV4I16Zero:
   4838     Opc = NVPTX::SULD_3D_V4I16_ZERO;
   4839     Ops.push_back(TexHandle);
   4840     Ops.push_back(N->getOperand(2));
   4841     Ops.push_back(N->getOperand(3));
   4842     Ops.push_back(N->getOperand(4));
   4843     Ops.push_back(Chain);
   4844     break;
   4845   case NVPTXISD::Suld3DV4I32Zero:
   4846     Opc = NVPTX::SULD_3D_V4I32_ZERO;
   4847     Ops.push_back(TexHandle);
   4848     Ops.push_back(N->getOperand(2));
   4849     Ops.push_back(N->getOperand(3));
   4850     Ops.push_back(N->getOperand(4));
   4851     Ops.push_back(Chain);
   4852     break;
   4853   }
   4854   ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
   4855   return true;
   4856 }
   4857 
   4858 
   4859 /// SelectBFE - Look for instruction sequences that can be made more efficient
   4860 /// by using the 'bfe' (bit-field extract) PTX instruction
   4861 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
   4862   SDLoc DL(N);
   4863   SDValue LHS = N->getOperand(0);
   4864   SDValue RHS = N->getOperand(1);
   4865   SDValue Len;
   4866   SDValue Start;
   4867   SDValue Val;
   4868   bool IsSigned = false;
   4869 
   4870   if (N->getOpcode() == ISD::AND) {
   4871     // Canonicalize the operands
   4872     // We want 'and %val, %mask'
   4873     if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
   4874       std::swap(LHS, RHS);
   4875     }
   4876 
   4877     ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
   4878     if (!Mask) {
   4879       // We need a constant mask on the RHS of the AND
   4880       return false;
   4881     }
   4882 
   4883     // Extract the mask bits
   4884     uint64_t MaskVal = Mask->getZExtValue();
   4885     if (!isMask_64(MaskVal)) {
   4886       // We *could* handle shifted masks here, but doing so would require an
   4887       // 'and' operation to fix up the low-order bits so we would trade
   4888       // shr+and for bfe+and, which has the same throughput
   4889       return false;
   4890     }
   4891 
   4892     // How many bits are in our mask?
   4893     uint64_t NumBits = countTrailingOnes(MaskVal);
   4894     Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
   4895 
   4896     if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
   4897       // We have a 'srl/and' pair, extract the effective start bit and length
   4898       Val = LHS.getNode()->getOperand(0);
   4899       Start = LHS.getNode()->getOperand(1);
   4900       ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
   4901       if (StartConst) {
   4902         uint64_t StartVal = StartConst->getZExtValue();
   4903         // How many "good" bits do we have left?  "good" is defined here as bits
   4904         // that exist in the original value, not shifted in.
   4905         uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
   4906         if (NumBits > GoodBits) {
   4907           // Do not handle the case where bits have been shifted in. In theory
   4908           // we could handle this, but the cost is likely higher than just
   4909           // emitting the srl/and pair.
   4910           return false;
   4911         }
   4912         Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
   4913       } else {
   4914         // Do not handle the case where the shift amount (can be zero if no srl
   4915         // was found) is not constant. We could handle this case, but it would
   4916         // require run-time logic that would be more expensive than just
   4917         // emitting the srl/and pair.
   4918         return false;
   4919       }
   4920     } else {
   4921       // Do not handle the case where the LHS of the and is not a shift. While
   4922       // it would be trivial to handle this case, it would just transform
   4923       // 'and' -> 'bfe', but 'and' has higher-throughput.
   4924       return false;
   4925     }
   4926   } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
   4927     if (LHS->getOpcode() == ISD::AND) {
   4928       ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
   4929       if (!ShiftCnst) {
   4930         // Shift amount must be constant
   4931         return false;
   4932       }
   4933 
   4934       uint64_t ShiftAmt = ShiftCnst->getZExtValue();
   4935 
   4936       SDValue AndLHS = LHS->getOperand(0);
   4937       SDValue AndRHS = LHS->getOperand(1);
   4938 
   4939       // Canonicalize the AND to have the mask on the RHS
   4940       if (isa<ConstantSDNode>(AndLHS)) {
   4941         std::swap(AndLHS, AndRHS);
   4942       }
   4943 
   4944       ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
   4945       if (!MaskCnst) {
   4946         // Mask must be constant
   4947         return false;
   4948       }
   4949 
   4950       uint64_t MaskVal = MaskCnst->getZExtValue();
   4951       uint64_t NumZeros;
   4952       uint64_t NumBits;
   4953       if (isMask_64(MaskVal)) {
   4954         NumZeros = 0;
   4955         // The number of bits in the result bitfield will be the number of
   4956         // trailing ones (the AND) minus the number of bits we shift off
   4957         NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
   4958       } else if (isShiftedMask_64(MaskVal)) {
   4959         NumZeros = countTrailingZeros(MaskVal);
   4960         unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
   4961         // The number of bits in the result bitfield will be the number of
   4962         // trailing zeros plus the number of set bits in the mask minus the
   4963         // number of bits we shift off
   4964         NumBits = NumZeros + NumOnes - ShiftAmt;
   4965       } else {
   4966         // This is not a mask we can handle
   4967         return false;
   4968       }
   4969 
   4970       if (ShiftAmt < NumZeros) {
   4971         // Handling this case would require extra logic that would make this
   4972         // transformation non-profitable
   4973         return false;
   4974       }
   4975 
   4976       Val = AndLHS;
   4977       Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
   4978       Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
   4979     } else if (LHS->getOpcode() == ISD::SHL) {
   4980       // Here, we have a pattern like:
   4981       //
   4982       // (sra (shl val, NN), MM)
   4983       // or
   4984       // (srl (shl val, NN), MM)
   4985       //
   4986       // If MM >= NN, we can efficiently optimize this with bfe
   4987       Val = LHS->getOperand(0);
   4988 
   4989       SDValue ShlRHS = LHS->getOperand(1);
   4990       ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
   4991       if (!ShlCnst) {
   4992         // Shift amount must be constant
   4993         return false;
   4994       }
   4995       uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
   4996 
   4997       SDValue ShrRHS = RHS;
   4998       ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
   4999       if (!ShrCnst) {
   5000         // Shift amount must be constant
   5001         return false;
   5002       }
   5003       uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
   5004 
   5005       // To avoid extra codegen and be profitable, we need Outer >= Inner
   5006       if (OuterShiftAmt < InnerShiftAmt) {
   5007         return false;
   5008       }
   5009 
   5010       // If the outer shift is more than the type size, we have no bitfield to
   5011       // extract (since we also check that the inner shift is <= the outer shift
   5012       // then this also implies that the inner shift is < the type size)
   5013       if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
   5014         return false;
   5015       }
   5016 
   5017       Start =
   5018         CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
   5019       Len =
   5020         CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
   5021                                   OuterShiftAmt, DL, MVT::i32);
   5022 
   5023       if (N->getOpcode() == ISD::SRA) {
   5024         // If we have a arithmetic right shift, we need to use the signed bfe
   5025         // variant
   5026         IsSigned = true;
   5027       }
   5028     } else {
   5029       // No can do...
   5030       return false;
   5031     }
   5032   } else {
   5033     // No can do...
   5034     return false;
   5035   }
   5036 
   5037 
   5038   unsigned Opc;
   5039   // For the BFE operations we form here from "and" and "srl", always use the
   5040   // unsigned variants.
   5041   if (Val.getValueType() == MVT::i32) {
   5042     if (IsSigned) {
   5043       Opc = NVPTX::BFE_S32rii;
   5044     } else {
   5045       Opc = NVPTX::BFE_U32rii;
   5046     }
   5047   } else if (Val.getValueType() == MVT::i64) {
   5048     if (IsSigned) {
   5049       Opc = NVPTX::BFE_S64rii;
   5050     } else {
   5051       Opc = NVPTX::BFE_U64rii;
   5052     }
   5053   } else {
   5054     // We cannot handle this type
   5055     return false;
   5056   }
   5057 
   5058   SDValue Ops[] = {
   5059     Val, Start, Len
   5060   };
   5061 
   5062   ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
   5063   return true;
   5064 }
   5065 
   5066 // SelectDirectAddr - Match a direct address for DAG.
   5067 // A direct address could be a globaladdress or externalsymbol.
   5068 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
   5069   // Return true if TGA or ES.
   5070   if (N.getOpcode() == ISD::TargetGlobalAddress ||
   5071       N.getOpcode() == ISD::TargetExternalSymbol) {
   5072     Address = N;
   5073     return true;
   5074   }
   5075   if (N.getOpcode() == NVPTXISD::Wrapper) {
   5076     Address = N.getOperand(0);
   5077     return true;
   5078   }
   5079   if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
   5080     unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
   5081     if (IID == Intrinsic::nvvm_ptr_gen_to_param)
   5082       if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
   5083         return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
   5084   }
   5085   return false;
   5086 }
   5087 
   5088 // symbol+offset
   5089 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
   5090     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   5091   if (Addr.getOpcode() == ISD::ADD) {
   5092     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
   5093       SDValue base = Addr.getOperand(0);
   5094       if (SelectDirectAddr(base, Base)) {
   5095         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
   5096                                            mvt);
   5097         return true;
   5098       }
   5099     }
   5100   }
   5101   return false;
   5102 }
   5103 
   5104 // symbol+offset
   5105 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
   5106                                      SDValue &Base, SDValue &Offset) {
   5107   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
   5108 }
   5109 
   5110 // symbol+offset
   5111 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
   5112                                        SDValue &Base, SDValue &Offset) {
   5113   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
   5114 }
   5115 
   5116 // register+offset
   5117 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
   5118     SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   5119   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
   5120     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
   5121     Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
   5122     return true;
   5123   }
   5124   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
   5125       Addr.getOpcode() == ISD::TargetGlobalAddress)
   5126     return false; // direct calls.
   5127 
   5128   if (Addr.getOpcode() == ISD::ADD) {
   5129     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
   5130       return false;
   5131     }
   5132     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
   5133       if (FrameIndexSDNode *FIN =
   5134               dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
   5135         // Constant offset from frame ref.
   5136         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
   5137       else
   5138         Base = Addr.getOperand(0);
   5139       Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
   5140                                          mvt);
   5141       return true;
   5142     }
   5143   }
   5144   return false;
   5145 }
   5146 
   5147 // register+offset
   5148 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
   5149                                      SDValue &Base, SDValue &Offset) {
   5150   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
   5151 }
   5152 
   5153 // register+offset
   5154 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
   5155                                        SDValue &Base, SDValue &Offset) {
   5156   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
   5157 }
   5158 
   5159 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
   5160                                                  unsigned int spN) const {
   5161   const Value *Src = nullptr;
   5162   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
   5163     if (spN == 0 && mN->getMemOperand()->getPseudoValue())
   5164       return true;
   5165     Src = mN->getMemOperand()->getValue();
   5166   }
   5167   if (!Src)
   5168     return false;
   5169   if (auto *PT = dyn_cast<PointerType>(Src->getType()))
   5170     return (PT->getAddressSpace() == spN);
   5171   return false;
   5172 }
   5173 
   5174 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   5175 /// inline asm expressions.
   5176 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
   5177     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
   5178   SDValue Op0, Op1;
   5179   switch (ConstraintID) {
   5180   default:
   5181     return true;
   5182   case InlineAsm::Constraint_m: // memory
   5183     if (SelectDirectAddr(Op, Op0)) {
   5184       OutOps.push_back(Op0);
   5185       OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
   5186       return false;
   5187     }
   5188     if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
   5189       OutOps.push_back(Op0);
   5190       OutOps.push_back(Op1);
   5191       return false;
   5192     }
   5193     break;
   5194   }
   5195   return true;
   5196 }
   5197 
   5198 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
   5199 /// conversion from \p SrcTy to \p DestTy.
   5200 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
   5201                                              bool IsSigned) {
   5202   switch (SrcTy.SimpleTy) {
   5203   default:
   5204     llvm_unreachable("Unhandled source type");
   5205   case MVT::i8:
   5206     switch (DestTy.SimpleTy) {
   5207     default:
   5208       llvm_unreachable("Unhandled dest type");
   5209     case MVT::i16:
   5210       return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
   5211     case MVT::i32:
   5212       return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
   5213     case MVT::i64:
   5214       return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
   5215     }
   5216   case MVT::i16:
   5217     switch (DestTy.SimpleTy) {
   5218     default:
   5219       llvm_unreachable("Unhandled dest type");
   5220     case MVT::i8:
   5221       return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
   5222     case MVT::i32:
   5223       return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
   5224     case MVT::i64:
   5225       return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
   5226     }
   5227   case MVT::i32:
   5228     switch (DestTy.SimpleTy) {
   5229     default:
   5230       llvm_unreachable("Unhandled dest type");
   5231     case MVT::i8:
   5232       return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
   5233     case MVT::i16:
   5234       return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
   5235     case MVT::i64:
   5236       return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
   5237     }
   5238   case MVT::i64:
   5239     switch (DestTy.SimpleTy) {
   5240     default:
   5241       llvm_unreachable("Unhandled dest type");
   5242     case MVT::i8:
   5243       return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
   5244     case MVT::i16:
   5245       return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
   5246     case MVT::i32:
   5247       return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
   5248     }
   5249   }
   5250 }
   5251