Home | History | Annotate | Download | only in NVPTX
      1 //
      2 //                     The LLVM Compiler Infrastructure
      3 //
      4 // This file is distributed under the University of Illinois Open Source
      5 // License. See LICENSE.TXT for details.
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
     10 // selection DAG.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "NVPTXISelLowering.h"
     15 #include "NVPTX.h"
     16 #include "NVPTXTargetMachine.h"
     17 #include "NVPTXTargetObjectFile.h"
     18 #include "NVPTXUtilities.h"
     19 #include "llvm/CodeGen/Analysis.h"
     20 #include "llvm/CodeGen/MachineFrameInfo.h"
     21 #include "llvm/CodeGen/MachineFunction.h"
     22 #include "llvm/CodeGen/MachineInstrBuilder.h"
     23 #include "llvm/CodeGen/MachineRegisterInfo.h"
     24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
     25 #include "llvm/IR/CallSite.h"
     26 #include "llvm/IR/DerivedTypes.h"
     27 #include "llvm/IR/Function.h"
     28 #include "llvm/IR/GlobalValue.h"
     29 #include "llvm/IR/IntrinsicInst.h"
     30 #include "llvm/IR/Intrinsics.h"
     31 #include "llvm/IR/Module.h"
     32 #include "llvm/MC/MCSectionELF.h"
     33 #include "llvm/Support/CommandLine.h"
     34 #include "llvm/Support/Debug.h"
     35 #include "llvm/Support/ErrorHandling.h"
     36 #include "llvm/Support/MathExtras.h"
     37 #include "llvm/Support/raw_ostream.h"
     38 #include <sstream>
     39 
     40 #undef DEBUG_TYPE
     41 #define DEBUG_TYPE "nvptx-lower"
     42 
     43 using namespace llvm;
     44 
     45 static unsigned int uniqueCallSite = 0;
     46 
     47 static cl::opt<bool> sched4reg(
     48     "nvptx-sched4reg",
     49     cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
     50 
     51 static cl::opt<unsigned>
     52 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
     53                     cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
     54                              " 1: do it  2: do it aggressively"),
     55                     cl::init(2));
     56 
     57 static bool IsPTXVectorType(MVT VT) {
     58   switch (VT.SimpleTy) {
     59   default:
     60     return false;
     61   case MVT::v2i1:
     62   case MVT::v4i1:
     63   case MVT::v2i8:
     64   case MVT::v4i8:
     65   case MVT::v2i16:
     66   case MVT::v4i16:
     67   case MVT::v2i32:
     68   case MVT::v4i32:
     69   case MVT::v2i64:
     70   case MVT::v2f32:
     71   case MVT::v4f32:
     72   case MVT::v2f64:
     73     return true;
     74   }
     75 }
     76 
     77 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
     78 /// EVTs that compose it.  Unlike ComputeValueVTs, this will break apart vectors
     79 /// into their primitive components.
     80 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
     81 /// same number of types as the Ins/Outs arrays in LowerFormalArguments,
     82 /// LowerCall, and LowerReturn.
     83 static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
     84                                Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
     85                                SmallVectorImpl<uint64_t> *Offsets = nullptr,
     86                                uint64_t StartingOffset = 0) {
     87   SmallVector<EVT, 16> TempVTs;
     88   SmallVector<uint64_t, 16> TempOffsets;
     89 
     90   ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
     91   for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
     92     EVT VT = TempVTs[i];
     93     uint64_t Off = TempOffsets[i];
     94     if (VT.isVector())
     95       for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) {
     96         ValueVTs.push_back(VT.getVectorElementType());
     97         if (Offsets)
     98           Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize());
     99       }
    100     else {
    101       ValueVTs.push_back(VT);
    102       if (Offsets)
    103         Offsets->push_back(Off);
    104     }
    105   }
    106 }
    107 
    108 // NVPTXTargetLowering Constructor.
    109 NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
    110                                          const NVPTXSubtarget &STI)
    111     : TargetLowering(TM), nvTM(&TM), STI(STI) {
    112 
    113   // always lower memset, memcpy, and memmove intrinsics to load/store
    114   // instructions, rather
    115   // then generating calls to memset, mempcy or memmove.
    116   MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
    117   MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
    118   MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
    119 
    120   setBooleanContents(ZeroOrNegativeOneBooleanContent);
    121   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
    122 
    123   // Jump is Expensive. Don't create extra control flow for 'and', 'or'
    124   // condition branches.
    125   setJumpIsExpensive(true);
    126 
    127   // Wide divides are _very_ slow. Try to reduce the width of the divide if
    128   // possible.
    129   addBypassSlowDiv(64, 32);
    130 
    131   // By default, use the Source scheduling
    132   if (sched4reg)
    133     setSchedulingPreference(Sched::RegPressure);
    134   else
    135     setSchedulingPreference(Sched::Source);
    136 
    137   addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
    138   addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
    139   addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
    140   addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
    141   addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
    142   addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
    143 
    144   // Operations not directly supported by NVPTX.
    145   setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
    146   setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
    147   setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
    148   setOperationAction(ISD::SELECT_CC, MVT::i8, Expand);
    149   setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
    150   setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
    151   setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
    152   setOperationAction(ISD::BR_CC, MVT::f32, Expand);
    153   setOperationAction(ISD::BR_CC, MVT::f64, Expand);
    154   setOperationAction(ISD::BR_CC, MVT::i1, Expand);
    155   setOperationAction(ISD::BR_CC, MVT::i8, Expand);
    156   setOperationAction(ISD::BR_CC, MVT::i16, Expand);
    157   setOperationAction(ISD::BR_CC, MVT::i32, Expand);
    158   setOperationAction(ISD::BR_CC, MVT::i64, Expand);
    159   // Some SIGN_EXTEND_INREG can be done using cvt instruction.
    160   // For others we will expand to a SHL/SRA pair.
    161   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal);
    162   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
    163   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
    164   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
    165   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
    166 
    167   setOperationAction(ISD::SHL_PARTS, MVT::i32  , Custom);
    168   setOperationAction(ISD::SRA_PARTS, MVT::i32  , Custom);
    169   setOperationAction(ISD::SRL_PARTS, MVT::i32  , Custom);
    170   setOperationAction(ISD::SHL_PARTS, MVT::i64  , Custom);
    171   setOperationAction(ISD::SRA_PARTS, MVT::i64  , Custom);
    172   setOperationAction(ISD::SRL_PARTS, MVT::i64  , Custom);
    173 
    174   if (STI.hasROT64()) {
    175     setOperationAction(ISD::ROTL, MVT::i64, Legal);
    176     setOperationAction(ISD::ROTR, MVT::i64, Legal);
    177   } else {
    178     setOperationAction(ISD::ROTL, MVT::i64, Expand);
    179     setOperationAction(ISD::ROTR, MVT::i64, Expand);
    180   }
    181   if (STI.hasROT32()) {
    182     setOperationAction(ISD::ROTL, MVT::i32, Legal);
    183     setOperationAction(ISD::ROTR, MVT::i32, Legal);
    184   } else {
    185     setOperationAction(ISD::ROTL, MVT::i32, Expand);
    186     setOperationAction(ISD::ROTR, MVT::i32, Expand);
    187   }
    188 
    189   setOperationAction(ISD::ROTL, MVT::i16, Expand);
    190   setOperationAction(ISD::ROTR, MVT::i16, Expand);
    191   setOperationAction(ISD::ROTL, MVT::i8, Expand);
    192   setOperationAction(ISD::ROTR, MVT::i8, Expand);
    193   setOperationAction(ISD::BSWAP, MVT::i16, Expand);
    194   setOperationAction(ISD::BSWAP, MVT::i32, Expand);
    195   setOperationAction(ISD::BSWAP, MVT::i64, Expand);
    196 
    197   // Indirect branch is not supported.
    198   // This also disables Jump Table creation.
    199   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
    200   setOperationAction(ISD::BRIND, MVT::Other, Expand);
    201 
    202   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
    203   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
    204 
    205   // We want to legalize constant related memmove and memcopy
    206   // intrinsics.
    207   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
    208 
    209   // Turn FP extload into load/fextend
    210   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
    211   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
    212   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
    213   setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
    214   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
    215   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
    216   setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
    217   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
    218   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
    219   // Turn FP truncstore into trunc + store.
    220   // FIXME: vector types should also be expanded
    221   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
    222   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
    223   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    224 
    225   // PTX does not support load / store predicate registers
    226   setOperationAction(ISD::LOAD, MVT::i1, Custom);
    227   setOperationAction(ISD::STORE, MVT::i1, Custom);
    228 
    229   for (MVT VT : MVT::integer_valuetypes()) {
    230     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
    231     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
    232     setTruncStoreAction(VT, MVT::i1, Expand);
    233   }
    234 
    235   // This is legal in NVPTX
    236   setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
    237   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
    238 
    239   // TRAP can be lowered to PTX trap
    240   setOperationAction(ISD::TRAP, MVT::Other, Legal);
    241 
    242   setOperationAction(ISD::ADDC, MVT::i64, Expand);
    243   setOperationAction(ISD::ADDE, MVT::i64, Expand);
    244 
    245   // Register custom handling for vector loads/stores
    246   for (MVT VT : MVT::vector_valuetypes()) {
    247     if (IsPTXVectorType(VT)) {
    248       setOperationAction(ISD::LOAD, VT, Custom);
    249       setOperationAction(ISD::STORE, VT, Custom);
    250       setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
    251     }
    252   }
    253 
    254   // Custom handling for i8 intrinsics
    255   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
    256 
    257   setOperationAction(ISD::CTLZ, MVT::i16, Legal);
    258   setOperationAction(ISD::CTLZ, MVT::i32, Legal);
    259   setOperationAction(ISD::CTLZ, MVT::i64, Legal);
    260   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal);
    261   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal);
    262   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);
    263   setOperationAction(ISD::CTTZ, MVT::i16, Expand);
    264   setOperationAction(ISD::CTTZ, MVT::i32, Expand);
    265   setOperationAction(ISD::CTTZ, MVT::i64, Expand);
    266   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
    267   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
    268   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
    269   setOperationAction(ISD::CTPOP, MVT::i16, Legal);
    270   setOperationAction(ISD::CTPOP, MVT::i32, Legal);
    271   setOperationAction(ISD::CTPOP, MVT::i64, Legal);
    272 
    273   // PTX does not directly support SELP of i1, so promote to i32 first
    274   setOperationAction(ISD::SELECT, MVT::i1, Custom);
    275 
    276   // We have some custom DAG combine patterns for these nodes
    277   setTargetDAGCombine(ISD::ADD);
    278   setTargetDAGCombine(ISD::AND);
    279   setTargetDAGCombine(ISD::FADD);
    280   setTargetDAGCombine(ISD::MUL);
    281   setTargetDAGCombine(ISD::SHL);
    282   setTargetDAGCombine(ISD::SELECT);
    283 
    284   // Now deduce the information based on the above mentioned
    285   // actions
    286   computeRegisterProperties(STI.getRegisterInfo());
    287 }
    288 
    289 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
    290   switch ((NVPTXISD::NodeType)Opcode) {
    291   case NVPTXISD::FIRST_NUMBER:
    292     break;
    293   case NVPTXISD::CALL:
    294     return "NVPTXISD::CALL";
    295   case NVPTXISD::RET_FLAG:
    296     return "NVPTXISD::RET_FLAG";
    297   case NVPTXISD::LOAD_PARAM:
    298     return "NVPTXISD::LOAD_PARAM";
    299   case NVPTXISD::Wrapper:
    300     return "NVPTXISD::Wrapper";
    301   case NVPTXISD::DeclareParam:
    302     return "NVPTXISD::DeclareParam";
    303   case NVPTXISD::DeclareScalarParam:
    304     return "NVPTXISD::DeclareScalarParam";
    305   case NVPTXISD::DeclareRet:
    306     return "NVPTXISD::DeclareRet";
    307   case NVPTXISD::DeclareScalarRet:
    308     return "NVPTXISD::DeclareScalarRet";
    309   case NVPTXISD::DeclareRetParam:
    310     return "NVPTXISD::DeclareRetParam";
    311   case NVPTXISD::PrintCall:
    312     return "NVPTXISD::PrintCall";
    313   case NVPTXISD::PrintCallUni:
    314     return "NVPTXISD::PrintCallUni";
    315   case NVPTXISD::LoadParam:
    316     return "NVPTXISD::LoadParam";
    317   case NVPTXISD::LoadParamV2:
    318     return "NVPTXISD::LoadParamV2";
    319   case NVPTXISD::LoadParamV4:
    320     return "NVPTXISD::LoadParamV4";
    321   case NVPTXISD::StoreParam:
    322     return "NVPTXISD::StoreParam";
    323   case NVPTXISD::StoreParamV2:
    324     return "NVPTXISD::StoreParamV2";
    325   case NVPTXISD::StoreParamV4:
    326     return "NVPTXISD::StoreParamV4";
    327   case NVPTXISD::StoreParamS32:
    328     return "NVPTXISD::StoreParamS32";
    329   case NVPTXISD::StoreParamU32:
    330     return "NVPTXISD::StoreParamU32";
    331   case NVPTXISD::CallArgBegin:
    332     return "NVPTXISD::CallArgBegin";
    333   case NVPTXISD::CallArg:
    334     return "NVPTXISD::CallArg";
    335   case NVPTXISD::LastCallArg:
    336     return "NVPTXISD::LastCallArg";
    337   case NVPTXISD::CallArgEnd:
    338     return "NVPTXISD::CallArgEnd";
    339   case NVPTXISD::CallVoid:
    340     return "NVPTXISD::CallVoid";
    341   case NVPTXISD::CallVal:
    342     return "NVPTXISD::CallVal";
    343   case NVPTXISD::CallSymbol:
    344     return "NVPTXISD::CallSymbol";
    345   case NVPTXISD::Prototype:
    346     return "NVPTXISD::Prototype";
    347   case NVPTXISD::MoveParam:
    348     return "NVPTXISD::MoveParam";
    349   case NVPTXISD::StoreRetval:
    350     return "NVPTXISD::StoreRetval";
    351   case NVPTXISD::StoreRetvalV2:
    352     return "NVPTXISD::StoreRetvalV2";
    353   case NVPTXISD::StoreRetvalV4:
    354     return "NVPTXISD::StoreRetvalV4";
    355   case NVPTXISD::PseudoUseParam:
    356     return "NVPTXISD::PseudoUseParam";
    357   case NVPTXISD::RETURN:
    358     return "NVPTXISD::RETURN";
    359   case NVPTXISD::CallSeqBegin:
    360     return "NVPTXISD::CallSeqBegin";
    361   case NVPTXISD::CallSeqEnd:
    362     return "NVPTXISD::CallSeqEnd";
    363   case NVPTXISD::CallPrototype:
    364     return "NVPTXISD::CallPrototype";
    365   case NVPTXISD::LoadV2:
    366     return "NVPTXISD::LoadV2";
    367   case NVPTXISD::LoadV4:
    368     return "NVPTXISD::LoadV4";
    369   case NVPTXISD::LDGV2:
    370     return "NVPTXISD::LDGV2";
    371   case NVPTXISD::LDGV4:
    372     return "NVPTXISD::LDGV4";
    373   case NVPTXISD::LDUV2:
    374     return "NVPTXISD::LDUV2";
    375   case NVPTXISD::LDUV4:
    376     return "NVPTXISD::LDUV4";
    377   case NVPTXISD::StoreV2:
    378     return "NVPTXISD::StoreV2";
    379   case NVPTXISD::StoreV4:
    380     return "NVPTXISD::StoreV4";
    381   case NVPTXISD::FUN_SHFL_CLAMP:
    382     return "NVPTXISD::FUN_SHFL_CLAMP";
    383   case NVPTXISD::FUN_SHFR_CLAMP:
    384     return "NVPTXISD::FUN_SHFR_CLAMP";
    385   case NVPTXISD::IMAD:
    386     return "NVPTXISD::IMAD";
    387   case NVPTXISD::Dummy:
    388     return "NVPTXISD::Dummy";
    389   case NVPTXISD::MUL_WIDE_SIGNED:
    390     return "NVPTXISD::MUL_WIDE_SIGNED";
    391   case NVPTXISD::MUL_WIDE_UNSIGNED:
    392     return "NVPTXISD::MUL_WIDE_UNSIGNED";
    393   case NVPTXISD::Tex1DFloatS32:        return "NVPTXISD::Tex1DFloatS32";
    394   case NVPTXISD::Tex1DFloatFloat:      return "NVPTXISD::Tex1DFloatFloat";
    395   case NVPTXISD::Tex1DFloatFloatLevel:
    396     return "NVPTXISD::Tex1DFloatFloatLevel";
    397   case NVPTXISD::Tex1DFloatFloatGrad:
    398     return "NVPTXISD::Tex1DFloatFloatGrad";
    399   case NVPTXISD::Tex1DS32S32:          return "NVPTXISD::Tex1DS32S32";
    400   case NVPTXISD::Tex1DS32Float:        return "NVPTXISD::Tex1DS32Float";
    401   case NVPTXISD::Tex1DS32FloatLevel:
    402     return "NVPTXISD::Tex1DS32FloatLevel";
    403   case NVPTXISD::Tex1DS32FloatGrad:
    404     return "NVPTXISD::Tex1DS32FloatGrad";
    405   case NVPTXISD::Tex1DU32S32:          return "NVPTXISD::Tex1DU32S32";
    406   case NVPTXISD::Tex1DU32Float:        return "NVPTXISD::Tex1DU32Float";
    407   case NVPTXISD::Tex1DU32FloatLevel:
    408     return "NVPTXISD::Tex1DU32FloatLevel";
    409   case NVPTXISD::Tex1DU32FloatGrad:
    410     return "NVPTXISD::Tex1DU32FloatGrad";
    411   case NVPTXISD::Tex1DArrayFloatS32:   return "NVPTXISD::Tex1DArrayFloatS32";
    412   case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
    413   case NVPTXISD::Tex1DArrayFloatFloatLevel:
    414     return "NVPTXISD::Tex1DArrayFloatFloatLevel";
    415   case NVPTXISD::Tex1DArrayFloatFloatGrad:
    416     return "NVPTXISD::Tex1DArrayFloatFloatGrad";
    417   case NVPTXISD::Tex1DArrayS32S32:     return "NVPTXISD::Tex1DArrayS32S32";
    418   case NVPTXISD::Tex1DArrayS32Float:   return "NVPTXISD::Tex1DArrayS32Float";
    419   case NVPTXISD::Tex1DArrayS32FloatLevel:
    420     return "NVPTXISD::Tex1DArrayS32FloatLevel";
    421   case NVPTXISD::Tex1DArrayS32FloatGrad:
    422     return "NVPTXISD::Tex1DArrayS32FloatGrad";
    423   case NVPTXISD::Tex1DArrayU32S32:     return "NVPTXISD::Tex1DArrayU32S32";
    424   case NVPTXISD::Tex1DArrayU32Float:   return "NVPTXISD::Tex1DArrayU32Float";
    425   case NVPTXISD::Tex1DArrayU32FloatLevel:
    426     return "NVPTXISD::Tex1DArrayU32FloatLevel";
    427   case NVPTXISD::Tex1DArrayU32FloatGrad:
    428     return "NVPTXISD::Tex1DArrayU32FloatGrad";
    429   case NVPTXISD::Tex2DFloatS32:        return "NVPTXISD::Tex2DFloatS32";
    430   case NVPTXISD::Tex2DFloatFloat:      return "NVPTXISD::Tex2DFloatFloat";
    431   case NVPTXISD::Tex2DFloatFloatLevel:
    432     return "NVPTXISD::Tex2DFloatFloatLevel";
    433   case NVPTXISD::Tex2DFloatFloatGrad:
    434     return "NVPTXISD::Tex2DFloatFloatGrad";
    435   case NVPTXISD::Tex2DS32S32:          return "NVPTXISD::Tex2DS32S32";
    436   case NVPTXISD::Tex2DS32Float:        return "NVPTXISD::Tex2DS32Float";
    437   case NVPTXISD::Tex2DS32FloatLevel:
    438     return "NVPTXISD::Tex2DS32FloatLevel";
    439   case NVPTXISD::Tex2DS32FloatGrad:
    440     return "NVPTXISD::Tex2DS32FloatGrad";
    441   case NVPTXISD::Tex2DU32S32:          return "NVPTXISD::Tex2DU32S32";
    442   case NVPTXISD::Tex2DU32Float:        return "NVPTXISD::Tex2DU32Float";
    443   case NVPTXISD::Tex2DU32FloatLevel:
    444     return "NVPTXISD::Tex2DU32FloatLevel";
    445   case NVPTXISD::Tex2DU32FloatGrad:
    446     return "NVPTXISD::Tex2DU32FloatGrad";
    447   case NVPTXISD::Tex2DArrayFloatS32:   return "NVPTXISD::Tex2DArrayFloatS32";
    448   case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
    449   case NVPTXISD::Tex2DArrayFloatFloatLevel:
    450     return "NVPTXISD::Tex2DArrayFloatFloatLevel";
    451   case NVPTXISD::Tex2DArrayFloatFloatGrad:
    452     return "NVPTXISD::Tex2DArrayFloatFloatGrad";
    453   case NVPTXISD::Tex2DArrayS32S32:     return "NVPTXISD::Tex2DArrayS32S32";
    454   case NVPTXISD::Tex2DArrayS32Float:   return "NVPTXISD::Tex2DArrayS32Float";
    455   case NVPTXISD::Tex2DArrayS32FloatLevel:
    456     return "NVPTXISD::Tex2DArrayS32FloatLevel";
    457   case NVPTXISD::Tex2DArrayS32FloatGrad:
    458     return "NVPTXISD::Tex2DArrayS32FloatGrad";
    459   case NVPTXISD::Tex2DArrayU32S32:     return "NVPTXISD::Tex2DArrayU32S32";
    460   case NVPTXISD::Tex2DArrayU32Float:   return "NVPTXISD::Tex2DArrayU32Float";
    461   case NVPTXISD::Tex2DArrayU32FloatLevel:
    462     return "NVPTXISD::Tex2DArrayU32FloatLevel";
    463   case NVPTXISD::Tex2DArrayU32FloatGrad:
    464     return "NVPTXISD::Tex2DArrayU32FloatGrad";
    465   case NVPTXISD::Tex3DFloatS32:        return "NVPTXISD::Tex3DFloatS32";
    466   case NVPTXISD::Tex3DFloatFloat:      return "NVPTXISD::Tex3DFloatFloat";
    467   case NVPTXISD::Tex3DFloatFloatLevel:
    468     return "NVPTXISD::Tex3DFloatFloatLevel";
    469   case NVPTXISD::Tex3DFloatFloatGrad:
    470     return "NVPTXISD::Tex3DFloatFloatGrad";
    471   case NVPTXISD::Tex3DS32S32:          return "NVPTXISD::Tex3DS32S32";
    472   case NVPTXISD::Tex3DS32Float:        return "NVPTXISD::Tex3DS32Float";
    473   case NVPTXISD::Tex3DS32FloatLevel:
    474     return "NVPTXISD::Tex3DS32FloatLevel";
    475   case NVPTXISD::Tex3DS32FloatGrad:
    476     return "NVPTXISD::Tex3DS32FloatGrad";
    477   case NVPTXISD::Tex3DU32S32:          return "NVPTXISD::Tex3DU32S32";
    478   case NVPTXISD::Tex3DU32Float:        return "NVPTXISD::Tex3DU32Float";
    479   case NVPTXISD::Tex3DU32FloatLevel:
    480     return "NVPTXISD::Tex3DU32FloatLevel";
    481   case NVPTXISD::Tex3DU32FloatGrad:
    482     return "NVPTXISD::Tex3DU32FloatGrad";
    483   case NVPTXISD::TexCubeFloatFloat:      return "NVPTXISD::TexCubeFloatFloat";
    484   case NVPTXISD::TexCubeFloatFloatLevel:
    485     return "NVPTXISD::TexCubeFloatFloatLevel";
    486   case NVPTXISD::TexCubeS32Float:        return "NVPTXISD::TexCubeS32Float";
    487   case NVPTXISD::TexCubeS32FloatLevel:
    488     return "NVPTXISD::TexCubeS32FloatLevel";
    489   case NVPTXISD::TexCubeU32Float:        return "NVPTXISD::TexCubeU32Float";
    490   case NVPTXISD::TexCubeU32FloatLevel:
    491     return "NVPTXISD::TexCubeU32FloatLevel";
    492   case NVPTXISD::TexCubeArrayFloatFloat:
    493     return "NVPTXISD::TexCubeArrayFloatFloat";
    494   case NVPTXISD::TexCubeArrayFloatFloatLevel:
    495     return "NVPTXISD::TexCubeArrayFloatFloatLevel";
    496   case NVPTXISD::TexCubeArrayS32Float:
    497     return "NVPTXISD::TexCubeArrayS32Float";
    498   case NVPTXISD::TexCubeArrayS32FloatLevel:
    499     return "NVPTXISD::TexCubeArrayS32FloatLevel";
    500   case NVPTXISD::TexCubeArrayU32Float:
    501     return "NVPTXISD::TexCubeArrayU32Float";
    502   case NVPTXISD::TexCubeArrayU32FloatLevel:
    503     return "NVPTXISD::TexCubeArrayU32FloatLevel";
    504   case NVPTXISD::Tld4R2DFloatFloat:
    505     return "NVPTXISD::Tld4R2DFloatFloat";
    506   case NVPTXISD::Tld4G2DFloatFloat:
    507     return "NVPTXISD::Tld4G2DFloatFloat";
    508   case NVPTXISD::Tld4B2DFloatFloat:
    509     return "NVPTXISD::Tld4B2DFloatFloat";
    510   case NVPTXISD::Tld4A2DFloatFloat:
    511     return "NVPTXISD::Tld4A2DFloatFloat";
    512   case NVPTXISD::Tld4R2DS64Float:
    513     return "NVPTXISD::Tld4R2DS64Float";
    514   case NVPTXISD::Tld4G2DS64Float:
    515     return "NVPTXISD::Tld4G2DS64Float";
    516   case NVPTXISD::Tld4B2DS64Float:
    517     return "NVPTXISD::Tld4B2DS64Float";
    518   case NVPTXISD::Tld4A2DS64Float:
    519     return "NVPTXISD::Tld4A2DS64Float";
    520   case NVPTXISD::Tld4R2DU64Float:
    521     return "NVPTXISD::Tld4R2DU64Float";
    522   case NVPTXISD::Tld4G2DU64Float:
    523     return "NVPTXISD::Tld4G2DU64Float";
    524   case NVPTXISD::Tld4B2DU64Float:
    525     return "NVPTXISD::Tld4B2DU64Float";
    526   case NVPTXISD::Tld4A2DU64Float:
    527     return "NVPTXISD::Tld4A2DU64Float";
    528 
    529   case NVPTXISD::TexUnified1DFloatS32:
    530     return "NVPTXISD::TexUnified1DFloatS32";
    531   case NVPTXISD::TexUnified1DFloatFloat:
    532     return "NVPTXISD::TexUnified1DFloatFloat";
    533   case NVPTXISD::TexUnified1DFloatFloatLevel:
    534     return "NVPTXISD::TexUnified1DFloatFloatLevel";
    535   case NVPTXISD::TexUnified1DFloatFloatGrad:
    536     return "NVPTXISD::TexUnified1DFloatFloatGrad";
    537   case NVPTXISD::TexUnified1DS32S32:
    538     return "NVPTXISD::TexUnified1DS32S32";
    539   case NVPTXISD::TexUnified1DS32Float:
    540     return "NVPTXISD::TexUnified1DS32Float";
    541   case NVPTXISD::TexUnified1DS32FloatLevel:
    542     return "NVPTXISD::TexUnified1DS32FloatLevel";
    543   case NVPTXISD::TexUnified1DS32FloatGrad:
    544     return "NVPTXISD::TexUnified1DS32FloatGrad";
    545   case NVPTXISD::TexUnified1DU32S32:
    546     return "NVPTXISD::TexUnified1DU32S32";
    547   case NVPTXISD::TexUnified1DU32Float:
    548     return "NVPTXISD::TexUnified1DU32Float";
    549   case NVPTXISD::TexUnified1DU32FloatLevel:
    550     return "NVPTXISD::TexUnified1DU32FloatLevel";
    551   case NVPTXISD::TexUnified1DU32FloatGrad:
    552     return "NVPTXISD::TexUnified1DU32FloatGrad";
    553   case NVPTXISD::TexUnified1DArrayFloatS32:
    554     return "NVPTXISD::TexUnified1DArrayFloatS32";
    555   case NVPTXISD::TexUnified1DArrayFloatFloat:
    556     return "NVPTXISD::TexUnified1DArrayFloatFloat";
    557   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
    558     return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
    559   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
    560     return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
    561   case NVPTXISD::TexUnified1DArrayS32S32:
    562     return "NVPTXISD::TexUnified1DArrayS32S32";
    563   case NVPTXISD::TexUnified1DArrayS32Float:
    564     return "NVPTXISD::TexUnified1DArrayS32Float";
    565   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
    566     return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
    567   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
    568     return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
    569   case NVPTXISD::TexUnified1DArrayU32S32:
    570     return "NVPTXISD::TexUnified1DArrayU32S32";
    571   case NVPTXISD::TexUnified1DArrayU32Float:
    572     return "NVPTXISD::TexUnified1DArrayU32Float";
    573   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
    574     return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
    575   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
    576     return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
    577   case NVPTXISD::TexUnified2DFloatS32:
    578     return "NVPTXISD::TexUnified2DFloatS32";
    579   case NVPTXISD::TexUnified2DFloatFloat:
    580     return "NVPTXISD::TexUnified2DFloatFloat";
    581   case NVPTXISD::TexUnified2DFloatFloatLevel:
    582     return "NVPTXISD::TexUnified2DFloatFloatLevel";
    583   case NVPTXISD::TexUnified2DFloatFloatGrad:
    584     return "NVPTXISD::TexUnified2DFloatFloatGrad";
    585   case NVPTXISD::TexUnified2DS32S32:
    586     return "NVPTXISD::TexUnified2DS32S32";
    587   case NVPTXISD::TexUnified2DS32Float:
    588     return "NVPTXISD::TexUnified2DS32Float";
    589   case NVPTXISD::TexUnified2DS32FloatLevel:
    590     return "NVPTXISD::TexUnified2DS32FloatLevel";
    591   case NVPTXISD::TexUnified2DS32FloatGrad:
    592     return "NVPTXISD::TexUnified2DS32FloatGrad";
    593   case NVPTXISD::TexUnified2DU32S32:
    594     return "NVPTXISD::TexUnified2DU32S32";
    595   case NVPTXISD::TexUnified2DU32Float:
    596     return "NVPTXISD::TexUnified2DU32Float";
    597   case NVPTXISD::TexUnified2DU32FloatLevel:
    598     return "NVPTXISD::TexUnified2DU32FloatLevel";
    599   case NVPTXISD::TexUnified2DU32FloatGrad:
    600     return "NVPTXISD::TexUnified2DU32FloatGrad";
    601   case NVPTXISD::TexUnified2DArrayFloatS32:
    602     return "NVPTXISD::TexUnified2DArrayFloatS32";
    603   case NVPTXISD::TexUnified2DArrayFloatFloat:
    604     return "NVPTXISD::TexUnified2DArrayFloatFloat";
    605   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
    606     return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
    607   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
    608     return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
    609   case NVPTXISD::TexUnified2DArrayS32S32:
    610     return "NVPTXISD::TexUnified2DArrayS32S32";
    611   case NVPTXISD::TexUnified2DArrayS32Float:
    612     return "NVPTXISD::TexUnified2DArrayS32Float";
    613   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
    614     return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
    615   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
    616     return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
    617   case NVPTXISD::TexUnified2DArrayU32S32:
    618     return "NVPTXISD::TexUnified2DArrayU32S32";
    619   case NVPTXISD::TexUnified2DArrayU32Float:
    620     return "NVPTXISD::TexUnified2DArrayU32Float";
    621   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
    622     return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
    623   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
    624     return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
    625   case NVPTXISD::TexUnified3DFloatS32:
    626     return "NVPTXISD::TexUnified3DFloatS32";
    627   case NVPTXISD::TexUnified3DFloatFloat:
    628     return "NVPTXISD::TexUnified3DFloatFloat";
    629   case NVPTXISD::TexUnified3DFloatFloatLevel:
    630     return "NVPTXISD::TexUnified3DFloatFloatLevel";
    631   case NVPTXISD::TexUnified3DFloatFloatGrad:
    632     return "NVPTXISD::TexUnified3DFloatFloatGrad";
    633   case NVPTXISD::TexUnified3DS32S32:
    634     return "NVPTXISD::TexUnified3DS32S32";
    635   case NVPTXISD::TexUnified3DS32Float:
    636     return "NVPTXISD::TexUnified3DS32Float";
    637   case NVPTXISD::TexUnified3DS32FloatLevel:
    638     return "NVPTXISD::TexUnified3DS32FloatLevel";
    639   case NVPTXISD::TexUnified3DS32FloatGrad:
    640     return "NVPTXISD::TexUnified3DS32FloatGrad";
    641   case NVPTXISD::TexUnified3DU32S32:
    642     return "NVPTXISD::TexUnified3DU32S32";
    643   case NVPTXISD::TexUnified3DU32Float:
    644     return "NVPTXISD::TexUnified3DU32Float";
    645   case NVPTXISD::TexUnified3DU32FloatLevel:
    646     return "NVPTXISD::TexUnified3DU32FloatLevel";
    647   case NVPTXISD::TexUnified3DU32FloatGrad:
    648     return "NVPTXISD::TexUnified3DU32FloatGrad";
    649   case NVPTXISD::TexUnifiedCubeFloatFloat:
    650     return "NVPTXISD::TexUnifiedCubeFloatFloat";
    651   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
    652     return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
    653   case NVPTXISD::TexUnifiedCubeS32Float:
    654     return "NVPTXISD::TexUnifiedCubeS32Float";
    655   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
    656     return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
    657   case NVPTXISD::TexUnifiedCubeU32Float:
    658     return "NVPTXISD::TexUnifiedCubeU32Float";
    659   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
    660     return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
    661   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
    662     return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
    663   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
    664     return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
    665   case NVPTXISD::TexUnifiedCubeArrayS32Float:
    666     return "NVPTXISD::TexUnifiedCubeArrayS32Float";
    667   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
    668     return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
    669   case NVPTXISD::TexUnifiedCubeArrayU32Float:
    670     return "NVPTXISD::TexUnifiedCubeArrayU32Float";
    671   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
    672     return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
    673   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
    674     return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
    675   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
    676     return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
    677   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
    678     return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
    679   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
    680     return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
    681   case NVPTXISD::Tld4UnifiedR2DS64Float:
    682     return "NVPTXISD::Tld4UnifiedR2DS64Float";
    683   case NVPTXISD::Tld4UnifiedG2DS64Float:
    684     return "NVPTXISD::Tld4UnifiedG2DS64Float";
    685   case NVPTXISD::Tld4UnifiedB2DS64Float:
    686     return "NVPTXISD::Tld4UnifiedB2DS64Float";
    687   case NVPTXISD::Tld4UnifiedA2DS64Float:
    688     return "NVPTXISD::Tld4UnifiedA2DS64Float";
    689   case NVPTXISD::Tld4UnifiedR2DU64Float:
    690     return "NVPTXISD::Tld4UnifiedR2DU64Float";
    691   case NVPTXISD::Tld4UnifiedG2DU64Float:
    692     return "NVPTXISD::Tld4UnifiedG2DU64Float";
    693   case NVPTXISD::Tld4UnifiedB2DU64Float:
    694     return "NVPTXISD::Tld4UnifiedB2DU64Float";
    695   case NVPTXISD::Tld4UnifiedA2DU64Float:
    696     return "NVPTXISD::Tld4UnifiedA2DU64Float";
    697 
    698   case NVPTXISD::Suld1DI8Clamp:          return "NVPTXISD::Suld1DI8Clamp";
    699   case NVPTXISD::Suld1DI16Clamp:         return "NVPTXISD::Suld1DI16Clamp";
    700   case NVPTXISD::Suld1DI32Clamp:         return "NVPTXISD::Suld1DI32Clamp";
    701   case NVPTXISD::Suld1DI64Clamp:         return "NVPTXISD::Suld1DI64Clamp";
    702   case NVPTXISD::Suld1DV2I8Clamp:        return "NVPTXISD::Suld1DV2I8Clamp";
    703   case NVPTXISD::Suld1DV2I16Clamp:       return "NVPTXISD::Suld1DV2I16Clamp";
    704   case NVPTXISD::Suld1DV2I32Clamp:       return "NVPTXISD::Suld1DV2I32Clamp";
    705   case NVPTXISD::Suld1DV2I64Clamp:       return "NVPTXISD::Suld1DV2I64Clamp";
    706   case NVPTXISD::Suld1DV4I8Clamp:        return "NVPTXISD::Suld1DV4I8Clamp";
    707   case NVPTXISD::Suld1DV4I16Clamp:       return "NVPTXISD::Suld1DV4I16Clamp";
    708   case NVPTXISD::Suld1DV4I32Clamp:       return "NVPTXISD::Suld1DV4I32Clamp";
    709 
    710   case NVPTXISD::Suld1DArrayI8Clamp:   return "NVPTXISD::Suld1DArrayI8Clamp";
    711   case NVPTXISD::Suld1DArrayI16Clamp:  return "NVPTXISD::Suld1DArrayI16Clamp";
    712   case NVPTXISD::Suld1DArrayI32Clamp:  return "NVPTXISD::Suld1DArrayI32Clamp";
    713   case NVPTXISD::Suld1DArrayI64Clamp:  return "NVPTXISD::Suld1DArrayI64Clamp";
    714   case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
    715   case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
    716   case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
    717   case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
    718   case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
    719   case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
    720   case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
    721 
    722   case NVPTXISD::Suld2DI8Clamp:          return "NVPTXISD::Suld2DI8Clamp";
    723   case NVPTXISD::Suld2DI16Clamp:         return "NVPTXISD::Suld2DI16Clamp";
    724   case NVPTXISD::Suld2DI32Clamp:         return "NVPTXISD::Suld2DI32Clamp";
    725   case NVPTXISD::Suld2DI64Clamp:         return "NVPTXISD::Suld2DI64Clamp";
    726   case NVPTXISD::Suld2DV2I8Clamp:        return "NVPTXISD::Suld2DV2I8Clamp";
    727   case NVPTXISD::Suld2DV2I16Clamp:       return "NVPTXISD::Suld2DV2I16Clamp";
    728   case NVPTXISD::Suld2DV2I32Clamp:       return "NVPTXISD::Suld2DV2I32Clamp";
    729   case NVPTXISD::Suld2DV2I64Clamp:       return "NVPTXISD::Suld2DV2I64Clamp";
    730   case NVPTXISD::Suld2DV4I8Clamp:        return "NVPTXISD::Suld2DV4I8Clamp";
    731   case NVPTXISD::Suld2DV4I16Clamp:       return "NVPTXISD::Suld2DV4I16Clamp";
    732   case NVPTXISD::Suld2DV4I32Clamp:       return "NVPTXISD::Suld2DV4I32Clamp";
    733 
    734   case NVPTXISD::Suld2DArrayI8Clamp:   return "NVPTXISD::Suld2DArrayI8Clamp";
    735   case NVPTXISD::Suld2DArrayI16Clamp:  return "NVPTXISD::Suld2DArrayI16Clamp";
    736   case NVPTXISD::Suld2DArrayI32Clamp:  return "NVPTXISD::Suld2DArrayI32Clamp";
    737   case NVPTXISD::Suld2DArrayI64Clamp:  return "NVPTXISD::Suld2DArrayI64Clamp";
    738   case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
    739   case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
    740   case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
    741   case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
    742   case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
    743   case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
    744   case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
    745 
    746   case NVPTXISD::Suld3DI8Clamp:          return "NVPTXISD::Suld3DI8Clamp";
    747   case NVPTXISD::Suld3DI16Clamp:         return "NVPTXISD::Suld3DI16Clamp";
    748   case NVPTXISD::Suld3DI32Clamp:         return "NVPTXISD::Suld3DI32Clamp";
    749   case NVPTXISD::Suld3DI64Clamp:         return "NVPTXISD::Suld3DI64Clamp";
    750   case NVPTXISD::Suld3DV2I8Clamp:        return "NVPTXISD::Suld3DV2I8Clamp";
    751   case NVPTXISD::Suld3DV2I16Clamp:       return "NVPTXISD::Suld3DV2I16Clamp";
    752   case NVPTXISD::Suld3DV2I32Clamp:       return "NVPTXISD::Suld3DV2I32Clamp";
    753   case NVPTXISD::Suld3DV2I64Clamp:       return "NVPTXISD::Suld3DV2I64Clamp";
    754   case NVPTXISD::Suld3DV4I8Clamp:        return "NVPTXISD::Suld3DV4I8Clamp";
    755   case NVPTXISD::Suld3DV4I16Clamp:       return "NVPTXISD::Suld3DV4I16Clamp";
    756   case NVPTXISD::Suld3DV4I32Clamp:       return "NVPTXISD::Suld3DV4I32Clamp";
    757 
    758   case NVPTXISD::Suld1DI8Trap:          return "NVPTXISD::Suld1DI8Trap";
    759   case NVPTXISD::Suld1DI16Trap:         return "NVPTXISD::Suld1DI16Trap";
    760   case NVPTXISD::Suld1DI32Trap:         return "NVPTXISD::Suld1DI32Trap";
    761   case NVPTXISD::Suld1DI64Trap:         return "NVPTXISD::Suld1DI64Trap";
    762   case NVPTXISD::Suld1DV2I8Trap:        return "NVPTXISD::Suld1DV2I8Trap";
    763   case NVPTXISD::Suld1DV2I16Trap:       return "NVPTXISD::Suld1DV2I16Trap";
    764   case NVPTXISD::Suld1DV2I32Trap:       return "NVPTXISD::Suld1DV2I32Trap";
    765   case NVPTXISD::Suld1DV2I64Trap:       return "NVPTXISD::Suld1DV2I64Trap";
    766   case NVPTXISD::Suld1DV4I8Trap:        return "NVPTXISD::Suld1DV4I8Trap";
    767   case NVPTXISD::Suld1DV4I16Trap:       return "NVPTXISD::Suld1DV4I16Trap";
    768   case NVPTXISD::Suld1DV4I32Trap:       return "NVPTXISD::Suld1DV4I32Trap";
    769 
    770   case NVPTXISD::Suld1DArrayI8Trap:     return "NVPTXISD::Suld1DArrayI8Trap";
    771   case NVPTXISD::Suld1DArrayI16Trap:    return "NVPTXISD::Suld1DArrayI16Trap";
    772   case NVPTXISD::Suld1DArrayI32Trap:    return "NVPTXISD::Suld1DArrayI32Trap";
    773   case NVPTXISD::Suld1DArrayI64Trap:    return "NVPTXISD::Suld1DArrayI64Trap";
    774   case NVPTXISD::Suld1DArrayV2I8Trap:   return "NVPTXISD::Suld1DArrayV2I8Trap";
    775   case NVPTXISD::Suld1DArrayV2I16Trap:  return "NVPTXISD::Suld1DArrayV2I16Trap";
    776   case NVPTXISD::Suld1DArrayV2I32Trap:  return "NVPTXISD::Suld1DArrayV2I32Trap";
    777   case NVPTXISD::Suld1DArrayV2I64Trap:  return "NVPTXISD::Suld1DArrayV2I64Trap";
    778   case NVPTXISD::Suld1DArrayV4I8Trap:   return "NVPTXISD::Suld1DArrayV4I8Trap";
    779   case NVPTXISD::Suld1DArrayV4I16Trap:  return "NVPTXISD::Suld1DArrayV4I16Trap";
    780   case NVPTXISD::Suld1DArrayV4I32Trap:  return "NVPTXISD::Suld1DArrayV4I32Trap";
    781 
    782   case NVPTXISD::Suld2DI8Trap:          return "NVPTXISD::Suld2DI8Trap";
    783   case NVPTXISD::Suld2DI16Trap:         return "NVPTXISD::Suld2DI16Trap";
    784   case NVPTXISD::Suld2DI32Trap:         return "NVPTXISD::Suld2DI32Trap";
    785   case NVPTXISD::Suld2DI64Trap:         return "NVPTXISD::Suld2DI64Trap";
    786   case NVPTXISD::Suld2DV2I8Trap:        return "NVPTXISD::Suld2DV2I8Trap";
    787   case NVPTXISD::Suld2DV2I16Trap:       return "NVPTXISD::Suld2DV2I16Trap";
    788   case NVPTXISD::Suld2DV2I32Trap:       return "NVPTXISD::Suld2DV2I32Trap";
    789   case NVPTXISD::Suld2DV2I64Trap:       return "NVPTXISD::Suld2DV2I64Trap";
    790   case NVPTXISD::Suld2DV4I8Trap:        return "NVPTXISD::Suld2DV4I8Trap";
    791   case NVPTXISD::Suld2DV4I16Trap:       return "NVPTXISD::Suld2DV4I16Trap";
    792   case NVPTXISD::Suld2DV4I32Trap:       return "NVPTXISD::Suld2DV4I32Trap";
    793 
    794   case NVPTXISD::Suld2DArrayI8Trap:     return "NVPTXISD::Suld2DArrayI8Trap";
    795   case NVPTXISD::Suld2DArrayI16Trap:    return "NVPTXISD::Suld2DArrayI16Trap";
    796   case NVPTXISD::Suld2DArrayI32Trap:    return "NVPTXISD::Suld2DArrayI32Trap";
    797   case NVPTXISD::Suld2DArrayI64Trap:    return "NVPTXISD::Suld2DArrayI64Trap";
    798   case NVPTXISD::Suld2DArrayV2I8Trap:   return "NVPTXISD::Suld2DArrayV2I8Trap";
    799   case NVPTXISD::Suld2DArrayV2I16Trap:  return "NVPTXISD::Suld2DArrayV2I16Trap";
    800   case NVPTXISD::Suld2DArrayV2I32Trap:  return "NVPTXISD::Suld2DArrayV2I32Trap";
    801   case NVPTXISD::Suld2DArrayV2I64Trap:  return "NVPTXISD::Suld2DArrayV2I64Trap";
    802   case NVPTXISD::Suld2DArrayV4I8Trap:   return "NVPTXISD::Suld2DArrayV4I8Trap";
    803   case NVPTXISD::Suld2DArrayV4I16Trap:  return "NVPTXISD::Suld2DArrayV4I16Trap";
    804   case NVPTXISD::Suld2DArrayV4I32Trap:  return "NVPTXISD::Suld2DArrayV4I32Trap";
    805 
    806   case NVPTXISD::Suld3DI8Trap:          return "NVPTXISD::Suld3DI8Trap";
    807   case NVPTXISD::Suld3DI16Trap:         return "NVPTXISD::Suld3DI16Trap";
    808   case NVPTXISD::Suld3DI32Trap:         return "NVPTXISD::Suld3DI32Trap";
    809   case NVPTXISD::Suld3DI64Trap:         return "NVPTXISD::Suld3DI64Trap";
    810   case NVPTXISD::Suld3DV2I8Trap:        return "NVPTXISD::Suld3DV2I8Trap";
    811   case NVPTXISD::Suld3DV2I16Trap:       return "NVPTXISD::Suld3DV2I16Trap";
    812   case NVPTXISD::Suld3DV2I32Trap:       return "NVPTXISD::Suld3DV2I32Trap";
    813   case NVPTXISD::Suld3DV2I64Trap:       return "NVPTXISD::Suld3DV2I64Trap";
    814   case NVPTXISD::Suld3DV4I8Trap:        return "NVPTXISD::Suld3DV4I8Trap";
    815   case NVPTXISD::Suld3DV4I16Trap:       return "NVPTXISD::Suld3DV4I16Trap";
    816   case NVPTXISD::Suld3DV4I32Trap:       return "NVPTXISD::Suld3DV4I32Trap";
    817 
    818   case NVPTXISD::Suld1DI8Zero:          return "NVPTXISD::Suld1DI8Zero";
    819   case NVPTXISD::Suld1DI16Zero:         return "NVPTXISD::Suld1DI16Zero";
    820   case NVPTXISD::Suld1DI32Zero:         return "NVPTXISD::Suld1DI32Zero";
    821   case NVPTXISD::Suld1DI64Zero:         return "NVPTXISD::Suld1DI64Zero";
    822   case NVPTXISD::Suld1DV2I8Zero:        return "NVPTXISD::Suld1DV2I8Zero";
    823   case NVPTXISD::Suld1DV2I16Zero:       return "NVPTXISD::Suld1DV2I16Zero";
    824   case NVPTXISD::Suld1DV2I32Zero:       return "NVPTXISD::Suld1DV2I32Zero";
    825   case NVPTXISD::Suld1DV2I64Zero:       return "NVPTXISD::Suld1DV2I64Zero";
    826   case NVPTXISD::Suld1DV4I8Zero:        return "NVPTXISD::Suld1DV4I8Zero";
    827   case NVPTXISD::Suld1DV4I16Zero:       return "NVPTXISD::Suld1DV4I16Zero";
    828   case NVPTXISD::Suld1DV4I32Zero:       return "NVPTXISD::Suld1DV4I32Zero";
    829 
    830   case NVPTXISD::Suld1DArrayI8Zero:     return "NVPTXISD::Suld1DArrayI8Zero";
    831   case NVPTXISD::Suld1DArrayI16Zero:    return "NVPTXISD::Suld1DArrayI16Zero";
    832   case NVPTXISD::Suld1DArrayI32Zero:    return "NVPTXISD::Suld1DArrayI32Zero";
    833   case NVPTXISD::Suld1DArrayI64Zero:    return "NVPTXISD::Suld1DArrayI64Zero";
    834   case NVPTXISD::Suld1DArrayV2I8Zero:   return "NVPTXISD::Suld1DArrayV2I8Zero";
    835   case NVPTXISD::Suld1DArrayV2I16Zero:  return "NVPTXISD::Suld1DArrayV2I16Zero";
    836   case NVPTXISD::Suld1DArrayV2I32Zero:  return "NVPTXISD::Suld1DArrayV2I32Zero";
    837   case NVPTXISD::Suld1DArrayV2I64Zero:  return "NVPTXISD::Suld1DArrayV2I64Zero";
    838   case NVPTXISD::Suld1DArrayV4I8Zero:   return "NVPTXISD::Suld1DArrayV4I8Zero";
    839   case NVPTXISD::Suld1DArrayV4I16Zero:  return "NVPTXISD::Suld1DArrayV4I16Zero";
    840   case NVPTXISD::Suld1DArrayV4I32Zero:  return "NVPTXISD::Suld1DArrayV4I32Zero";
    841 
    842   case NVPTXISD::Suld2DI8Zero:          return "NVPTXISD::Suld2DI8Zero";
    843   case NVPTXISD::Suld2DI16Zero:         return "NVPTXISD::Suld2DI16Zero";
    844   case NVPTXISD::Suld2DI32Zero:         return "NVPTXISD::Suld2DI32Zero";
    845   case NVPTXISD::Suld2DI64Zero:         return "NVPTXISD::Suld2DI64Zero";
    846   case NVPTXISD::Suld2DV2I8Zero:        return "NVPTXISD::Suld2DV2I8Zero";
    847   case NVPTXISD::Suld2DV2I16Zero:       return "NVPTXISD::Suld2DV2I16Zero";
    848   case NVPTXISD::Suld2DV2I32Zero:       return "NVPTXISD::Suld2DV2I32Zero";
    849   case NVPTXISD::Suld2DV2I64Zero:       return "NVPTXISD::Suld2DV2I64Zero";
    850   case NVPTXISD::Suld2DV4I8Zero:        return "NVPTXISD::Suld2DV4I8Zero";
    851   case NVPTXISD::Suld2DV4I16Zero:       return "NVPTXISD::Suld2DV4I16Zero";
    852   case NVPTXISD::Suld2DV4I32Zero:       return "NVPTXISD::Suld2DV4I32Zero";
    853 
    854   case NVPTXISD::Suld2DArrayI8Zero:     return "NVPTXISD::Suld2DArrayI8Zero";
    855   case NVPTXISD::Suld2DArrayI16Zero:    return "NVPTXISD::Suld2DArrayI16Zero";
    856   case NVPTXISD::Suld2DArrayI32Zero:    return "NVPTXISD::Suld2DArrayI32Zero";
    857   case NVPTXISD::Suld2DArrayI64Zero:    return "NVPTXISD::Suld2DArrayI64Zero";
    858   case NVPTXISD::Suld2DArrayV2I8Zero:   return "NVPTXISD::Suld2DArrayV2I8Zero";
    859   case NVPTXISD::Suld2DArrayV2I16Zero:  return "NVPTXISD::Suld2DArrayV2I16Zero";
    860   case NVPTXISD::Suld2DArrayV2I32Zero:  return "NVPTXISD::Suld2DArrayV2I32Zero";
    861   case NVPTXISD::Suld2DArrayV2I64Zero:  return "NVPTXISD::Suld2DArrayV2I64Zero";
    862   case NVPTXISD::Suld2DArrayV4I8Zero:   return "NVPTXISD::Suld2DArrayV4I8Zero";
    863   case NVPTXISD::Suld2DArrayV4I16Zero:  return "NVPTXISD::Suld2DArrayV4I16Zero";
    864   case NVPTXISD::Suld2DArrayV4I32Zero:  return "NVPTXISD::Suld2DArrayV4I32Zero";
    865 
    866   case NVPTXISD::Suld3DI8Zero:          return "NVPTXISD::Suld3DI8Zero";
    867   case NVPTXISD::Suld3DI16Zero:         return "NVPTXISD::Suld3DI16Zero";
    868   case NVPTXISD::Suld3DI32Zero:         return "NVPTXISD::Suld3DI32Zero";
    869   case NVPTXISD::Suld3DI64Zero:         return "NVPTXISD::Suld3DI64Zero";
    870   case NVPTXISD::Suld3DV2I8Zero:        return "NVPTXISD::Suld3DV2I8Zero";
    871   case NVPTXISD::Suld3DV2I16Zero:       return "NVPTXISD::Suld3DV2I16Zero";
    872   case NVPTXISD::Suld3DV2I32Zero:       return "NVPTXISD::Suld3DV2I32Zero";
    873   case NVPTXISD::Suld3DV2I64Zero:       return "NVPTXISD::Suld3DV2I64Zero";
    874   case NVPTXISD::Suld3DV4I8Zero:        return "NVPTXISD::Suld3DV4I8Zero";
    875   case NVPTXISD::Suld3DV4I16Zero:       return "NVPTXISD::Suld3DV4I16Zero";
    876   case NVPTXISD::Suld3DV4I32Zero:       return "NVPTXISD::Suld3DV4I32Zero";
    877   }
    878   return nullptr;
    879 }
    880 
    881 TargetLoweringBase::LegalizeTypeAction
    882 NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
    883   if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
    884     return TypeSplitVector;
    885 
    886   return TargetLoweringBase::getPreferredVectorAction(VT);
    887 }
    888 
    889 SDValue
    890 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
    891   SDLoc dl(Op);
    892   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
    893   auto PtrVT = getPointerTy(DAG.getDataLayout());
    894   Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
    895   return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
    896 }
    897 
    898 std::string NVPTXTargetLowering::getPrototype(
    899     const DataLayout &DL, Type *retTy, const ArgListTy &Args,
    900     const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment,
    901     const ImmutableCallSite *CS) const {
    902   auto PtrVT = getPointerTy(DL);
    903 
    904   bool isABI = (STI.getSmVersion() >= 20);
    905   assert(isABI && "Non-ABI compilation is not supported");
    906   if (!isABI)
    907     return "";
    908 
    909   std::stringstream O;
    910   O << "prototype_" << uniqueCallSite << " : .callprototype ";
    911 
    912   if (retTy->getTypeID() == Type::VoidTyID) {
    913     O << "()";
    914   } else {
    915     O << "(";
    916     if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
    917       unsigned size = 0;
    918       if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
    919         size = ITy->getBitWidth();
    920         if (size < 32)
    921           size = 32;
    922       } else {
    923         assert(retTy->isFloatingPointTy() &&
    924                "Floating point type expected here");
    925         size = retTy->getPrimitiveSizeInBits();
    926       }
    927 
    928       O << ".param .b" << size << " _";
    929     } else if (isa<PointerType>(retTy)) {
    930       O << ".param .b" << PtrVT.getSizeInBits() << " _";
    931     } else if ((retTy->getTypeID() == Type::StructTyID) ||
    932                isa<VectorType>(retTy)) {
    933       auto &DL = CS->getCalledFunction()->getParent()->getDataLayout();
    934       O << ".param .align " << retAlignment << " .b8 _["
    935         << DL.getTypeAllocSize(retTy) << "]";
    936     } else {
    937       llvm_unreachable("Unknown return type");
    938     }
    939     O << ") ";
    940   }
    941   O << "_ (";
    942 
    943   bool first = true;
    944 
    945   unsigned OIdx = 0;
    946   for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
    947     Type *Ty = Args[i].Ty;
    948     if (!first) {
    949       O << ", ";
    950     }
    951     first = false;
    952 
    953     if (!Outs[OIdx].Flags.isByVal()) {
    954       if (Ty->isAggregateType() || Ty->isVectorTy()) {
    955         unsigned align = 0;
    956         const CallInst *CallI = cast<CallInst>(CS->getInstruction());
    957         // +1 because index 0 is reserved for return type alignment
    958         if (!llvm::getAlign(*CallI, i + 1, align))
    959           align = DL.getABITypeAlignment(Ty);
    960         unsigned sz = DL.getTypeAllocSize(Ty);
    961         O << ".param .align " << align << " .b8 ";
    962         O << "_";
    963         O << "[" << sz << "]";
    964         // update the index for Outs
    965         SmallVector<EVT, 16> vtparts;
    966         ComputeValueVTs(*this, DL, Ty, vtparts);
    967         if (unsigned len = vtparts.size())
    968           OIdx += len - 1;
    969         continue;
    970       }
    971        // i8 types in IR will be i16 types in SDAG
    972       assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
    973               (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
    974              "type mismatch between callee prototype and arguments");
    975       // scalar type
    976       unsigned sz = 0;
    977       if (isa<IntegerType>(Ty)) {
    978         sz = cast<IntegerType>(Ty)->getBitWidth();
    979         if (sz < 32)
    980           sz = 32;
    981       } else if (isa<PointerType>(Ty))
    982         sz = PtrVT.getSizeInBits();
    983       else
    984         sz = Ty->getPrimitiveSizeInBits();
    985       O << ".param .b" << sz << " ";
    986       O << "_";
    987       continue;
    988     }
    989     auto *PTy = dyn_cast<PointerType>(Ty);
    990     assert(PTy && "Param with byval attribute should be a pointer type");
    991     Type *ETy = PTy->getElementType();
    992 
    993     unsigned align = Outs[OIdx].Flags.getByValAlign();
    994     unsigned sz = DL.getTypeAllocSize(ETy);
    995     O << ".param .align " << align << " .b8 ";
    996     O << "_";
    997     O << "[" << sz << "]";
    998   }
    999   O << ");";
   1000   return O.str();
   1001 }
   1002 
   1003 unsigned
   1004 NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
   1005                                           const ImmutableCallSite *CS,
   1006                                           Type *Ty,
   1007                                           unsigned Idx) const {
   1008   unsigned Align = 0;
   1009   const Value *DirectCallee = CS->getCalledFunction();
   1010 
   1011   if (!DirectCallee) {
   1012     // We don't have a direct function symbol, but that may be because of
   1013     // constant cast instructions in the call.
   1014     const Instruction *CalleeI = CS->getInstruction();
   1015     assert(CalleeI && "Call target is not a function or derived value?");
   1016 
   1017     // With bitcast'd call targets, the instruction will be the call
   1018     if (isa<CallInst>(CalleeI)) {
   1019       // Check if we have call alignment metadata
   1020       if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align))
   1021         return Align;
   1022 
   1023       const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
   1024       // Ignore any bitcast instructions
   1025       while(isa<ConstantExpr>(CalleeV)) {
   1026         const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
   1027         if (!CE->isCast())
   1028           break;
   1029         // Look through the bitcast
   1030         CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0);
   1031       }
   1032 
   1033       // We have now looked past all of the bitcasts.  Do we finally have a
   1034       // Function?
   1035       if (isa<Function>(CalleeV))
   1036         DirectCallee = CalleeV;
   1037     }
   1038   }
   1039 
   1040   // Check for function alignment information if we found that the
   1041   // ultimate target is a Function
   1042   if (DirectCallee)
   1043     if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align))
   1044       return Align;
   1045 
   1046   // Call is indirect or alignment information is not available, fall back to
   1047   // the ABI type alignment
   1048   auto &DL = CS->getCaller()->getParent()->getDataLayout();
   1049   return DL.getABITypeAlignment(Ty);
   1050 }
   1051 
   1052 SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   1053                                        SmallVectorImpl<SDValue> &InVals) const {
   1054   SelectionDAG &DAG = CLI.DAG;
   1055   SDLoc dl = CLI.DL;
   1056   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
   1057   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
   1058   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
   1059   SDValue Chain = CLI.Chain;
   1060   SDValue Callee = CLI.Callee;
   1061   bool &isTailCall = CLI.IsTailCall;
   1062   ArgListTy &Args = CLI.getArgs();
   1063   Type *retTy = CLI.RetTy;
   1064   ImmutableCallSite *CS = CLI.CS;
   1065 
   1066   bool isABI = (STI.getSmVersion() >= 20);
   1067   assert(isABI && "Non-ABI compilation is not supported");
   1068   if (!isABI)
   1069     return Chain;
   1070   MachineFunction &MF = DAG.getMachineFunction();
   1071   const Function *F = MF.getFunction();
   1072   auto &DL = MF.getDataLayout();
   1073 
   1074   SDValue tempChain = Chain;
   1075   Chain = DAG.getCALLSEQ_START(Chain,
   1076                                DAG.getIntPtrConstant(uniqueCallSite, dl, true),
   1077                                dl);
   1078   SDValue InFlag = Chain.getValue(1);
   1079 
   1080   unsigned paramCount = 0;
   1081   // Args.size() and Outs.size() need not match.
   1082   // Outs.size() will be larger
   1083   //   * if there is an aggregate argument with multiple fields (each field
   1084   //     showing up separately in Outs)
   1085   //   * if there is a vector argument with more than typical vector-length
   1086   //     elements (generally if more than 4) where each vector element is
   1087   //     individually present in Outs.
   1088   // So a different index should be used for indexing into Outs/OutVals.
   1089   // See similar issue in LowerFormalArguments.
   1090   unsigned OIdx = 0;
   1091   // Declare the .params or .reg need to pass values
   1092   // to the function
   1093   for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
   1094     EVT VT = Outs[OIdx].VT;
   1095     Type *Ty = Args[i].Ty;
   1096 
   1097     if (!Outs[OIdx].Flags.isByVal()) {
   1098       if (Ty->isAggregateType()) {
   1099         // aggregate
   1100         SmallVector<EVT, 16> vtparts;
   1101         SmallVector<uint64_t, 16> Offsets;
   1102         ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets,
   1103                            0);
   1104 
   1105         unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
   1106         // declare .param .align <align> .b8 .param<n>[<size>];
   1107         unsigned sz = DL.getTypeAllocSize(Ty);
   1108         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1109         SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl,
   1110                                                              MVT::i32),
   1111                                       DAG.getConstant(paramCount, dl, MVT::i32),
   1112                                       DAG.getConstant(sz, dl, MVT::i32),
   1113                                       InFlag };
   1114         Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
   1115                             DeclareParamOps);
   1116         InFlag = Chain.getValue(1);
   1117         for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
   1118           EVT elemtype = vtparts[j];
   1119           unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]);
   1120           if (elemtype.isInteger() && (sz < 8))
   1121             sz = 8;
   1122           SDValue StVal = OutVals[OIdx];
   1123           if (elemtype.getSizeInBits() < 16) {
   1124             StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
   1125           }
   1126           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1127           SDValue CopyParamOps[] = { Chain,
   1128                                      DAG.getConstant(paramCount, dl, MVT::i32),
   1129                                      DAG.getConstant(Offsets[j], dl, MVT::i32),
   1130                                      StVal, InFlag };
   1131           Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
   1132                                           CopyParamVTs, CopyParamOps,
   1133                                           elemtype, MachinePointerInfo(),
   1134                                           ArgAlign);
   1135           InFlag = Chain.getValue(1);
   1136           ++OIdx;
   1137         }
   1138         if (vtparts.size() > 0)
   1139           --OIdx;
   1140         ++paramCount;
   1141         continue;
   1142       }
   1143       if (Ty->isVectorTy()) {
   1144         EVT ObjectVT = getValueType(DL, Ty);
   1145         unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
   1146         // declare .param .align <align> .b8 .param<n>[<size>];
   1147         unsigned sz = DL.getTypeAllocSize(Ty);
   1148         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1149         SDValue DeclareParamOps[] = { Chain,
   1150                                       DAG.getConstant(align, dl, MVT::i32),
   1151                                       DAG.getConstant(paramCount, dl, MVT::i32),
   1152                                       DAG.getConstant(sz, dl, MVT::i32),
   1153                                       InFlag };
   1154         Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
   1155                             DeclareParamOps);
   1156         InFlag = Chain.getValue(1);
   1157         unsigned NumElts = ObjectVT.getVectorNumElements();
   1158         EVT EltVT = ObjectVT.getVectorElementType();
   1159         EVT MemVT = EltVT;
   1160         bool NeedExtend = false;
   1161         if (EltVT.getSizeInBits() < 16) {
   1162           NeedExtend = true;
   1163           EltVT = MVT::i16;
   1164         }
   1165 
   1166         // V1 store
   1167         if (NumElts == 1) {
   1168           SDValue Elt = OutVals[OIdx++];
   1169           if (NeedExtend)
   1170             Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt);
   1171 
   1172           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1173           SDValue CopyParamOps[] = { Chain,
   1174                                      DAG.getConstant(paramCount, dl, MVT::i32),
   1175                                      DAG.getConstant(0, dl, MVT::i32), Elt,
   1176                                      InFlag };
   1177           Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
   1178                                           CopyParamVTs, CopyParamOps,
   1179                                           MemVT, MachinePointerInfo());
   1180           InFlag = Chain.getValue(1);
   1181         } else if (NumElts == 2) {
   1182           SDValue Elt0 = OutVals[OIdx++];
   1183           SDValue Elt1 = OutVals[OIdx++];
   1184           if (NeedExtend) {
   1185             Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0);
   1186             Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1);
   1187           }
   1188 
   1189           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1190           SDValue CopyParamOps[] = { Chain,
   1191                                      DAG.getConstant(paramCount, dl, MVT::i32),
   1192                                      DAG.getConstant(0, dl, MVT::i32), Elt0,
   1193                                      Elt1, InFlag };
   1194           Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl,
   1195                                           CopyParamVTs, CopyParamOps,
   1196                                           MemVT, MachinePointerInfo());
   1197           InFlag = Chain.getValue(1);
   1198         } else {
   1199           unsigned curOffset = 0;
   1200           // V4 stores
   1201           // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
   1202           // the
   1203           // vector will be expanded to a power of 2 elements, so we know we can
   1204           // always round up to the next multiple of 4 when creating the vector
   1205           // stores.
   1206           // e.g.  4 elem => 1 st.v4
   1207           //       6 elem => 2 st.v4
   1208           //       8 elem => 2 st.v4
   1209           //      11 elem => 3 st.v4
   1210           unsigned VecSize = 4;
   1211           if (EltVT.getSizeInBits() == 64)
   1212             VecSize = 2;
   1213 
   1214           // This is potentially only part of a vector, so assume all elements
   1215           // are packed together.
   1216           unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize;
   1217 
   1218           for (unsigned i = 0; i < NumElts; i += VecSize) {
   1219             // Get values
   1220             SDValue StoreVal;
   1221             SmallVector<SDValue, 8> Ops;
   1222             Ops.push_back(Chain);
   1223             Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
   1224             Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32));
   1225 
   1226             unsigned Opc = NVPTXISD::StoreParamV2;
   1227 
   1228             StoreVal = OutVals[OIdx++];
   1229             if (NeedExtend)
   1230               StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
   1231             Ops.push_back(StoreVal);
   1232 
   1233             if (i + 1 < NumElts) {
   1234               StoreVal = OutVals[OIdx++];
   1235               if (NeedExtend)
   1236                 StoreVal =
   1237                     DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
   1238             } else {
   1239               StoreVal = DAG.getUNDEF(EltVT);
   1240             }
   1241             Ops.push_back(StoreVal);
   1242 
   1243             if (VecSize == 4) {
   1244               Opc = NVPTXISD::StoreParamV4;
   1245               if (i + 2 < NumElts) {
   1246                 StoreVal = OutVals[OIdx++];
   1247                 if (NeedExtend)
   1248                   StoreVal =
   1249                       DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
   1250               } else {
   1251                 StoreVal = DAG.getUNDEF(EltVT);
   1252               }
   1253               Ops.push_back(StoreVal);
   1254 
   1255               if (i + 3 < NumElts) {
   1256                 StoreVal = OutVals[OIdx++];
   1257                 if (NeedExtend)
   1258                   StoreVal =
   1259                       DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
   1260               } else {
   1261                 StoreVal = DAG.getUNDEF(EltVT);
   1262               }
   1263               Ops.push_back(StoreVal);
   1264             }
   1265 
   1266             Ops.push_back(InFlag);
   1267 
   1268             SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1269             Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops,
   1270                                             MemVT, MachinePointerInfo());
   1271             InFlag = Chain.getValue(1);
   1272             curOffset += PerStoreOffset;
   1273           }
   1274         }
   1275         ++paramCount;
   1276         --OIdx;
   1277         continue;
   1278       }
   1279       // Plain scalar
   1280       // for ABI,    declare .param .b<size> .param<n>;
   1281       unsigned sz = VT.getSizeInBits();
   1282       bool needExtend = false;
   1283       if (VT.isInteger()) {
   1284         if (sz < 16)
   1285           needExtend = true;
   1286         if (sz < 32)
   1287           sz = 32;
   1288       }
   1289       SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1290       SDValue DeclareParamOps[] = { Chain,
   1291                                     DAG.getConstant(paramCount, dl, MVT::i32),
   1292                                     DAG.getConstant(sz, dl, MVT::i32),
   1293                                     DAG.getConstant(0, dl, MVT::i32), InFlag };
   1294       Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
   1295                           DeclareParamOps);
   1296       InFlag = Chain.getValue(1);
   1297       SDValue OutV = OutVals[OIdx];
   1298       if (needExtend) {
   1299         // zext/sext i1 to i16
   1300         unsigned opc = ISD::ZERO_EXTEND;
   1301         if (Outs[OIdx].Flags.isSExt())
   1302           opc = ISD::SIGN_EXTEND;
   1303         OutV = DAG.getNode(opc, dl, MVT::i16, OutV);
   1304       }
   1305       SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1306       SDValue CopyParamOps[] = { Chain,
   1307                                  DAG.getConstant(paramCount, dl, MVT::i32),
   1308                                  DAG.getConstant(0, dl, MVT::i32), OutV,
   1309                                  InFlag };
   1310 
   1311       unsigned opcode = NVPTXISD::StoreParam;
   1312       if (Outs[OIdx].Flags.isZExt())
   1313         opcode = NVPTXISD::StoreParamU32;
   1314       else if (Outs[OIdx].Flags.isSExt())
   1315         opcode = NVPTXISD::StoreParamS32;
   1316       Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps,
   1317                                       VT, MachinePointerInfo());
   1318 
   1319       InFlag = Chain.getValue(1);
   1320       ++paramCount;
   1321       continue;
   1322     }
   1323     // struct or vector
   1324     SmallVector<EVT, 16> vtparts;
   1325     SmallVector<uint64_t, 16> Offsets;
   1326     auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
   1327     assert(PTy && "Type of a byval parameter should be pointer");
   1328     ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
   1329                        vtparts, &Offsets, 0);
   1330 
   1331     // declare .param .align <align> .b8 .param<n>[<size>];
   1332     unsigned sz = Outs[OIdx].Flags.getByValSize();
   1333     SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1334     unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
   1335     // The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
   1336     // so we don't need to worry about natural alignment or not.
   1337     // See TargetLowering::LowerCallTo().
   1338     SDValue DeclareParamOps[] = {
   1339       Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), dl, MVT::i32),
   1340       DAG.getConstant(paramCount, dl, MVT::i32),
   1341       DAG.getConstant(sz, dl, MVT::i32), InFlag
   1342     };
   1343     Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
   1344                         DeclareParamOps);
   1345     InFlag = Chain.getValue(1);
   1346     for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
   1347       EVT elemtype = vtparts[j];
   1348       int curOffset = Offsets[j];
   1349       unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
   1350       auto PtrVT = getPointerTy(DAG.getDataLayout());
   1351       SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
   1352                                     DAG.getConstant(curOffset, dl, PtrVT));
   1353       SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
   1354                                    MachinePointerInfo(), false, false, false,
   1355                                    PartAlign);
   1356       if (elemtype.getSizeInBits() < 16) {
   1357         theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
   1358       }
   1359       SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1360       SDValue CopyParamOps[] = { Chain,
   1361                                  DAG.getConstant(paramCount, dl, MVT::i32),
   1362                                  DAG.getConstant(curOffset, dl, MVT::i32),
   1363                                  theVal, InFlag };
   1364       Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
   1365                                       CopyParamOps, elemtype,
   1366                                       MachinePointerInfo());
   1367 
   1368       InFlag = Chain.getValue(1);
   1369     }
   1370     ++paramCount;
   1371   }
   1372 
   1373   GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
   1374   unsigned retAlignment = 0;
   1375 
   1376   // Handle Result
   1377   if (Ins.size() > 0) {
   1378     SmallVector<EVT, 16> resvtparts;
   1379     ComputeValueVTs(*this, DL, retTy, resvtparts);
   1380 
   1381     // Declare
   1382     //  .param .align 16 .b8 retval0[<size-in-bytes>], or
   1383     //  .param .b<size-in-bits> retval0
   1384     unsigned resultsz = DL.getTypeAllocSizeInBits(retTy);
   1385     // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
   1386     // these three types to match the logic in
   1387     // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
   1388     // Plus, this behavior is consistent with nvcc's.
   1389     if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
   1390         retTy->isPointerTy()) {
   1391       // Scalar needs to be at least 32bit wide
   1392       if (resultsz < 32)
   1393         resultsz = 32;
   1394       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1395       SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
   1396                                   DAG.getConstant(resultsz, dl, MVT::i32),
   1397                                   DAG.getConstant(0, dl, MVT::i32), InFlag };
   1398       Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
   1399                           DeclareRetOps);
   1400       InFlag = Chain.getValue(1);
   1401     } else {
   1402       retAlignment = getArgumentAlignment(Callee, CS, retTy, 0);
   1403       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1404       SDValue DeclareRetOps[] = { Chain,
   1405                                   DAG.getConstant(retAlignment, dl, MVT::i32),
   1406                                   DAG.getConstant(resultsz / 8, dl, MVT::i32),
   1407                                   DAG.getConstant(0, dl, MVT::i32), InFlag };
   1408       Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
   1409                           DeclareRetOps);
   1410       InFlag = Chain.getValue(1);
   1411     }
   1412   }
   1413 
   1414   if (!Func) {
   1415     // This is indirect function call case : PTX requires a prototype of the
   1416     // form
   1417     // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
   1418     // to be emitted, and the label has to used as the last arg of call
   1419     // instruction.
   1420     // The prototype is embedded in a string and put as the operand for a
   1421     // CallPrototype SDNode which will print out to the value of the string.
   1422     SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1423     std::string Proto =
   1424         getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS);
   1425     const char *ProtoStr =
   1426       nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
   1427     SDValue ProtoOps[] = {
   1428       Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,
   1429     };
   1430     Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
   1431     InFlag = Chain.getValue(1);
   1432   }
   1433   // Op to just print "call"
   1434   SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1435   SDValue PrintCallOps[] = {
   1436     Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
   1437   };
   1438   Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
   1439                       dl, PrintCallVTs, PrintCallOps);
   1440   InFlag = Chain.getValue(1);
   1441 
   1442   // Ops to print out the function name
   1443   SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1444   SDValue CallVoidOps[] = { Chain, Callee, InFlag };
   1445   Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
   1446   InFlag = Chain.getValue(1);
   1447 
   1448   // Ops to print out the param list
   1449   SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1450   SDValue CallArgBeginOps[] = { Chain, InFlag };
   1451   Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
   1452                       CallArgBeginOps);
   1453   InFlag = Chain.getValue(1);
   1454 
   1455   for (unsigned i = 0, e = paramCount; i != e; ++i) {
   1456     unsigned opcode;
   1457     if (i == (e - 1))
   1458       opcode = NVPTXISD::LastCallArg;
   1459     else
   1460       opcode = NVPTXISD::CallArg;
   1461     SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1462     SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
   1463                              DAG.getConstant(i, dl, MVT::i32), InFlag };
   1464     Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
   1465     InFlag = Chain.getValue(1);
   1466   }
   1467   SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1468   SDValue CallArgEndOps[] = { Chain,
   1469                               DAG.getConstant(Func ? 1 : 0, dl, MVT::i32),
   1470                               InFlag };
   1471   Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
   1472   InFlag = Chain.getValue(1);
   1473 
   1474   if (!Func) {
   1475     SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1476     SDValue PrototypeOps[] = { Chain,
   1477                                DAG.getConstant(uniqueCallSite, dl, MVT::i32),
   1478                                InFlag };
   1479     Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
   1480     InFlag = Chain.getValue(1);
   1481   }
   1482 
   1483   // Generate loads from param memory/moves from registers for result
   1484   if (Ins.size() > 0) {
   1485     if (retTy && retTy->isVectorTy()) {
   1486       EVT ObjectVT = getValueType(DL, retTy);
   1487       unsigned NumElts = ObjectVT.getVectorNumElements();
   1488       EVT EltVT = ObjectVT.getVectorElementType();
   1489       assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
   1490                                                       ObjectVT) == NumElts &&
   1491              "Vector was not scalarized");
   1492       unsigned sz = EltVT.getSizeInBits();
   1493       bool needTruncate = sz < 8;
   1494 
   1495       if (NumElts == 1) {
   1496         // Just a simple load
   1497         SmallVector<EVT, 4> LoadRetVTs;
   1498         if (EltVT == MVT::i1 || EltVT == MVT::i8) {
   1499           // If loading i1/i8 result, generate
   1500           //   load.b8 i16
   1501           //   if i1
   1502           //   trunc i16 to i1
   1503           LoadRetVTs.push_back(MVT::i16);
   1504         } else
   1505           LoadRetVTs.push_back(EltVT);
   1506         LoadRetVTs.push_back(MVT::Other);
   1507         LoadRetVTs.push_back(MVT::Glue);
   1508         SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
   1509                                 DAG.getConstant(0, dl, MVT::i32), InFlag};
   1510         SDValue retval = DAG.getMemIntrinsicNode(
   1511             NVPTXISD::LoadParam, dl,
   1512             DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
   1513         Chain = retval.getValue(1);
   1514         InFlag = retval.getValue(2);
   1515         SDValue Ret0 = retval;
   1516         if (needTruncate)
   1517           Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0);
   1518         InVals.push_back(Ret0);
   1519       } else if (NumElts == 2) {
   1520         // LoadV2
   1521         SmallVector<EVT, 4> LoadRetVTs;
   1522         if (EltVT == MVT::i1 || EltVT == MVT::i8) {
   1523           // If loading i1/i8 result, generate
   1524           //   load.b8 i16
   1525           //   if i1
   1526           //   trunc i16 to i1
   1527           LoadRetVTs.push_back(MVT::i16);
   1528           LoadRetVTs.push_back(MVT::i16);
   1529         } else {
   1530           LoadRetVTs.push_back(EltVT);
   1531           LoadRetVTs.push_back(EltVT);
   1532         }
   1533         LoadRetVTs.push_back(MVT::Other);
   1534         LoadRetVTs.push_back(MVT::Glue);
   1535         SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
   1536                                 DAG.getConstant(0, dl, MVT::i32), InFlag};
   1537         SDValue retval = DAG.getMemIntrinsicNode(
   1538             NVPTXISD::LoadParamV2, dl,
   1539             DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
   1540         Chain = retval.getValue(2);
   1541         InFlag = retval.getValue(3);
   1542         SDValue Ret0 = retval.getValue(0);
   1543         SDValue Ret1 = retval.getValue(1);
   1544         if (needTruncate) {
   1545           Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0);
   1546           InVals.push_back(Ret0);
   1547           Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1);
   1548           InVals.push_back(Ret1);
   1549         } else {
   1550           InVals.push_back(Ret0);
   1551           InVals.push_back(Ret1);
   1552         }
   1553       } else {
   1554         // Split into N LoadV4
   1555         unsigned Ofst = 0;
   1556         unsigned VecSize = 4;
   1557         unsigned Opc = NVPTXISD::LoadParamV4;
   1558         if (EltVT.getSizeInBits() == 64) {
   1559           VecSize = 2;
   1560           Opc = NVPTXISD::LoadParamV2;
   1561         }
   1562         EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
   1563         for (unsigned i = 0; i < NumElts; i += VecSize) {
   1564           SmallVector<EVT, 8> LoadRetVTs;
   1565           if (EltVT == MVT::i1 || EltVT == MVT::i8) {
   1566             // If loading i1/i8 result, generate
   1567             //   load.b8 i16
   1568             //   if i1
   1569             //   trunc i16 to i1
   1570             for (unsigned j = 0; j < VecSize; ++j)
   1571               LoadRetVTs.push_back(MVT::i16);
   1572           } else {
   1573             for (unsigned j = 0; j < VecSize; ++j)
   1574               LoadRetVTs.push_back(EltVT);
   1575           }
   1576           LoadRetVTs.push_back(MVT::Other);
   1577           LoadRetVTs.push_back(MVT::Glue);
   1578           SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
   1579                                   DAG.getConstant(Ofst, dl, MVT::i32), InFlag};
   1580           SDValue retval = DAG.getMemIntrinsicNode(
   1581               Opc, dl, DAG.getVTList(LoadRetVTs),
   1582               LoadRetOps, EltVT, MachinePointerInfo());
   1583           if (VecSize == 2) {
   1584             Chain = retval.getValue(2);
   1585             InFlag = retval.getValue(3);
   1586           } else {
   1587             Chain = retval.getValue(4);
   1588             InFlag = retval.getValue(5);
   1589           }
   1590 
   1591           for (unsigned j = 0; j < VecSize; ++j) {
   1592             if (i + j >= NumElts)
   1593               break;
   1594             SDValue Elt = retval.getValue(j);
   1595             if (needTruncate)
   1596               Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
   1597             InVals.push_back(Elt);
   1598           }
   1599           Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
   1600         }
   1601       }
   1602     } else {
   1603       SmallVector<EVT, 16> VTs;
   1604       SmallVector<uint64_t, 16> Offsets;
   1605       ComputePTXValueVTs(*this, DAG.getDataLayout(), retTy, VTs, &Offsets, 0);
   1606       assert(VTs.size() == Ins.size() && "Bad value decomposition");
   1607       unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
   1608       for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
   1609         unsigned sz = VTs[i].getSizeInBits();
   1610         unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
   1611         bool needTruncate = sz < 8;
   1612         if (VTs[i].isInteger() && (sz < 8))
   1613           sz = 8;
   1614 
   1615         SmallVector<EVT, 4> LoadRetVTs;
   1616         EVT TheLoadType = VTs[i];
   1617         if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) {
   1618           // This is for integer types only, and specifically not for
   1619           // aggregates.
   1620           LoadRetVTs.push_back(MVT::i32);
   1621           TheLoadType = MVT::i32;
   1622         } else if (sz < 16) {
   1623           // If loading i1/i8 result, generate
   1624           //   load i8 (-> i16)
   1625           //   trunc i16 to i1/i8
   1626           LoadRetVTs.push_back(MVT::i16);
   1627         } else
   1628           LoadRetVTs.push_back(Ins[i].VT);
   1629         LoadRetVTs.push_back(MVT::Other);
   1630         LoadRetVTs.push_back(MVT::Glue);
   1631 
   1632         SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
   1633                                 DAG.getConstant(Offsets[i], dl, MVT::i32),
   1634                                 InFlag};
   1635         SDValue retval = DAG.getMemIntrinsicNode(
   1636             NVPTXISD::LoadParam, dl,
   1637             DAG.getVTList(LoadRetVTs), LoadRetOps,
   1638             TheLoadType, MachinePointerInfo(), AlignI);
   1639         Chain = retval.getValue(1);
   1640         InFlag = retval.getValue(2);
   1641         SDValue Ret0 = retval.getValue(0);
   1642         if (needTruncate)
   1643           Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0);
   1644         InVals.push_back(Ret0);
   1645       }
   1646     }
   1647   }
   1648 
   1649   Chain = DAG.getCALLSEQ_END(Chain,
   1650                              DAG.getIntPtrConstant(uniqueCallSite, dl, true),
   1651                              DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
   1652                                                    true),
   1653                              InFlag, dl);
   1654   uniqueCallSite++;
   1655 
   1656   // set isTailCall to false for now, until we figure out how to express
   1657   // tail call optimization in PTX
   1658   isTailCall = false;
   1659   return Chain;
   1660 }
   1661 
   1662 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
   1663 // (see LegalizeDAG.cpp). This is slow and uses local memory.
   1664 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
   1665 SDValue
   1666 NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
   1667   SDNode *Node = Op.getNode();
   1668   SDLoc dl(Node);
   1669   SmallVector<SDValue, 8> Ops;
   1670   unsigned NumOperands = Node->getNumOperands();
   1671   for (unsigned i = 0; i < NumOperands; ++i) {
   1672     SDValue SubOp = Node->getOperand(i);
   1673     EVT VVT = SubOp.getNode()->getValueType(0);
   1674     EVT EltVT = VVT.getVectorElementType();
   1675     unsigned NumSubElem = VVT.getVectorNumElements();
   1676     for (unsigned j = 0; j < NumSubElem; ++j) {
   1677       Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
   1678                                 DAG.getIntPtrConstant(j, dl)));
   1679     }
   1680   }
   1681   return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
   1682 }
   1683 
   1684 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
   1685 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
   1686 ///    amount, or
   1687 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
   1688 ///    amount.
   1689 SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
   1690                                                   SelectionDAG &DAG) const {
   1691   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   1692   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
   1693 
   1694   EVT VT = Op.getValueType();
   1695   unsigned VTBits = VT.getSizeInBits();
   1696   SDLoc dl(Op);
   1697   SDValue ShOpLo = Op.getOperand(0);
   1698   SDValue ShOpHi = Op.getOperand(1);
   1699   SDValue ShAmt  = Op.getOperand(2);
   1700   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
   1701 
   1702   if (VTBits == 32 && STI.getSmVersion() >= 35) {
   1703 
   1704     // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
   1705     // {dHi, dLo} = {aHi, aLo} >> Amt
   1706     //   dHi = aHi >> Amt
   1707     //   dLo = shf.r.clamp aLo, aHi, Amt
   1708 
   1709     SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
   1710     SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
   1711                              ShAmt);
   1712 
   1713     SDValue Ops[2] = { Lo, Hi };
   1714     return DAG.getMergeValues(Ops, dl);
   1715   }
   1716   else {
   1717 
   1718     // {dHi, dLo} = {aHi, aLo} >> Amt
   1719     // - if (Amt>=size) then
   1720     //      dLo = aHi >> (Amt-size)
   1721     //      dHi = aHi >> Amt (this is either all 0 or all 1)
   1722     //   else
   1723     //      dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
   1724     //      dHi = aHi >> Amt
   1725 
   1726     SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
   1727                                    DAG.getConstant(VTBits, dl, MVT::i32),
   1728                                    ShAmt);
   1729     SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
   1730     SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
   1731                                      DAG.getConstant(VTBits, dl, MVT::i32));
   1732     SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
   1733     SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   1734     SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
   1735 
   1736     SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
   1737                                DAG.getConstant(VTBits, dl, MVT::i32),
   1738                                ISD::SETGE);
   1739     SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
   1740     SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
   1741 
   1742     SDValue Ops[2] = { Lo, Hi };
   1743     return DAG.getMergeValues(Ops, dl);
   1744   }
   1745 }
   1746 
   1747 /// LowerShiftLeftParts - Lower SHL_PARTS, which
   1748 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
   1749 ///    amount, or
   1750 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
   1751 ///    amount.
   1752 SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
   1753                                                  SelectionDAG &DAG) const {
   1754   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   1755   assert(Op.getOpcode() == ISD::SHL_PARTS);
   1756 
   1757   EVT VT = Op.getValueType();
   1758   unsigned VTBits = VT.getSizeInBits();
   1759   SDLoc dl(Op);
   1760   SDValue ShOpLo = Op.getOperand(0);
   1761   SDValue ShOpHi = Op.getOperand(1);
   1762   SDValue ShAmt  = Op.getOperand(2);
   1763 
   1764   if (VTBits == 32 && STI.getSmVersion() >= 35) {
   1765 
   1766     // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
   1767     // {dHi, dLo} = {aHi, aLo} << Amt
   1768     //   dHi = shf.l.clamp aLo, aHi, Amt
   1769     //   dLo = aLo << Amt
   1770 
   1771     SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
   1772                              ShAmt);
   1773     SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
   1774 
   1775     SDValue Ops[2] = { Lo, Hi };
   1776     return DAG.getMergeValues(Ops, dl);
   1777   }
   1778   else {
   1779 
   1780     // {dHi, dLo} = {aHi, aLo} << Amt
   1781     // - if (Amt>=size) then
   1782     //      dLo = aLo << Amt (all 0)
   1783     //      dLo = aLo << (Amt-size)
   1784     //   else
   1785     //      dLo = aLo << Amt
   1786     //      dHi = (aHi << Amt) | (aLo >> (size-Amt))
   1787 
   1788     SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
   1789                                    DAG.getConstant(VTBits, dl, MVT::i32),
   1790                                    ShAmt);
   1791     SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
   1792     SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
   1793                                      DAG.getConstant(VTBits, dl, MVT::i32));
   1794     SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
   1795     SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   1796     SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
   1797 
   1798     SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
   1799                                DAG.getConstant(VTBits, dl, MVT::i32),
   1800                                ISD::SETGE);
   1801     SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
   1802     SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
   1803 
   1804     SDValue Ops[2] = { Lo, Hi };
   1805     return DAG.getMergeValues(Ops, dl);
   1806   }
   1807 }
   1808 
   1809 SDValue
   1810 NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   1811   switch (Op.getOpcode()) {
   1812   case ISD::RETURNADDR:
   1813     return SDValue();
   1814   case ISD::FRAMEADDR:
   1815     return SDValue();
   1816   case ISD::GlobalAddress:
   1817     return LowerGlobalAddress(Op, DAG);
   1818   case ISD::INTRINSIC_W_CHAIN:
   1819     return Op;
   1820   case ISD::BUILD_VECTOR:
   1821   case ISD::EXTRACT_SUBVECTOR:
   1822     return Op;
   1823   case ISD::CONCAT_VECTORS:
   1824     return LowerCONCAT_VECTORS(Op, DAG);
   1825   case ISD::STORE:
   1826     return LowerSTORE(Op, DAG);
   1827   case ISD::LOAD:
   1828     return LowerLOAD(Op, DAG);
   1829   case ISD::SHL_PARTS:
   1830     return LowerShiftLeftParts(Op, DAG);
   1831   case ISD::SRA_PARTS:
   1832   case ISD::SRL_PARTS:
   1833     return LowerShiftRightParts(Op, DAG);
   1834   case ISD::SELECT:
   1835     return LowerSelect(Op, DAG);
   1836   default:
   1837     llvm_unreachable("Custom lowering not defined for operation");
   1838   }
   1839 }
   1840 
   1841 SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
   1842   SDValue Op0 = Op->getOperand(0);
   1843   SDValue Op1 = Op->getOperand(1);
   1844   SDValue Op2 = Op->getOperand(2);
   1845   SDLoc DL(Op.getNode());
   1846 
   1847   assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
   1848 
   1849   Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
   1850   Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
   1851   SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
   1852   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
   1853 
   1854   return Trunc;
   1855 }
   1856 
   1857 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   1858   if (Op.getValueType() == MVT::i1)
   1859     return LowerLOADi1(Op, DAG);
   1860   else
   1861     return SDValue();
   1862 }
   1863 
   1864 // v = ld i1* addr
   1865 //   =>
   1866 // v1 = ld i8* addr (-> i16)
   1867 // v = trunc i16 to i1
   1868 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
   1869   SDNode *Node = Op.getNode();
   1870   LoadSDNode *LD = cast<LoadSDNode>(Node);
   1871   SDLoc dl(Node);
   1872   assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
   1873   assert(Node->getValueType(0) == MVT::i1 &&
   1874          "Custom lowering for i1 load only");
   1875   SDValue newLD =
   1876       DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
   1877                   LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
   1878                   LD->isInvariant(), LD->getAlignment());
   1879   SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
   1880   // The legalizer (the caller) is expecting two values from the legalized
   1881   // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
   1882   // in LegalizeDAG.cpp which also uses MergeValues.
   1883   SDValue Ops[] = { result, LD->getChain() };
   1884   return DAG.getMergeValues(Ops, dl);
   1885 }
   1886 
   1887 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   1888   EVT ValVT = Op.getOperand(1).getValueType();
   1889   if (ValVT == MVT::i1)
   1890     return LowerSTOREi1(Op, DAG);
   1891   else if (ValVT.isVector())
   1892     return LowerSTOREVector(Op, DAG);
   1893   else
   1894     return SDValue();
   1895 }
   1896 
   1897 SDValue
   1898 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
   1899   SDNode *N = Op.getNode();
   1900   SDValue Val = N->getOperand(1);
   1901   SDLoc DL(N);
   1902   EVT ValVT = Val.getValueType();
   1903 
   1904   if (ValVT.isVector()) {
   1905     // We only handle "native" vector sizes for now, e.g. <4 x double> is not
   1906     // legal.  We can (and should) split that into 2 stores of <2 x double> here
   1907     // but I'm leaving that as a TODO for now.
   1908     if (!ValVT.isSimple())
   1909       return SDValue();
   1910     switch (ValVT.getSimpleVT().SimpleTy) {
   1911     default:
   1912       return SDValue();
   1913     case MVT::v2i8:
   1914     case MVT::v2i16:
   1915     case MVT::v2i32:
   1916     case MVT::v2i64:
   1917     case MVT::v2f32:
   1918     case MVT::v2f64:
   1919     case MVT::v4i8:
   1920     case MVT::v4i16:
   1921     case MVT::v4i32:
   1922     case MVT::v4f32:
   1923       // This is a "native" vector type
   1924       break;
   1925     }
   1926 
   1927     MemSDNode *MemSD = cast<MemSDNode>(N);
   1928     const DataLayout &TD = DAG.getDataLayout();
   1929 
   1930     unsigned Align = MemSD->getAlignment();
   1931     unsigned PrefAlign =
   1932         TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
   1933     if (Align < PrefAlign) {
   1934       // This store is not sufficiently aligned, so bail out and let this vector
   1935       // store be scalarized.  Note that we may still be able to emit smaller
   1936       // vector stores.  For example, if we are storing a <4 x float> with an
   1937       // alignment of 8, this check will fail but the legalizer will try again
   1938       // with 2 x <2 x float>, which will succeed with an alignment of 8.
   1939       return SDValue();
   1940     }
   1941 
   1942     unsigned Opcode = 0;
   1943     EVT EltVT = ValVT.getVectorElementType();
   1944     unsigned NumElts = ValVT.getVectorNumElements();
   1945 
   1946     // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
   1947     // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
   1948     // stored type to i16 and propagate the "real" type as the memory type.
   1949     bool NeedExt = false;
   1950     if (EltVT.getSizeInBits() < 16)
   1951       NeedExt = true;
   1952 
   1953     switch (NumElts) {
   1954     default:
   1955       return SDValue();
   1956     case 2:
   1957       Opcode = NVPTXISD::StoreV2;
   1958       break;
   1959     case 4: {
   1960       Opcode = NVPTXISD::StoreV4;
   1961       break;
   1962     }
   1963     }
   1964 
   1965     SmallVector<SDValue, 8> Ops;
   1966 
   1967     // First is the chain
   1968     Ops.push_back(N->getOperand(0));
   1969 
   1970     // Then the split values
   1971     for (unsigned i = 0; i < NumElts; ++i) {
   1972       SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
   1973                                    DAG.getIntPtrConstant(i, DL));
   1974       if (NeedExt)
   1975         ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
   1976       Ops.push_back(ExtVal);
   1977     }
   1978 
   1979     // Then any remaining arguments
   1980     Ops.append(N->op_begin() + 2, N->op_end());
   1981 
   1982     SDValue NewSt = DAG.getMemIntrinsicNode(
   1983         Opcode, DL, DAG.getVTList(MVT::Other), Ops,
   1984         MemSD->getMemoryVT(), MemSD->getMemOperand());
   1985 
   1986     //return DCI.CombineTo(N, NewSt, true);
   1987     return NewSt;
   1988   }
   1989 
   1990   return SDValue();
   1991 }
   1992 
   1993 // st i1 v, addr
   1994 //    =>
   1995 // v1 = zxt v to i16
   1996 // st.u8 i16, addr
   1997 SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
   1998   SDNode *Node = Op.getNode();
   1999   SDLoc dl(Node);
   2000   StoreSDNode *ST = cast<StoreSDNode>(Node);
   2001   SDValue Tmp1 = ST->getChain();
   2002   SDValue Tmp2 = ST->getBasePtr();
   2003   SDValue Tmp3 = ST->getValue();
   2004   assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
   2005   unsigned Alignment = ST->getAlignment();
   2006   bool isVolatile = ST->isVolatile();
   2007   bool isNonTemporal = ST->isNonTemporal();
   2008   Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
   2009   SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
   2010                                      ST->getPointerInfo(), MVT::i8, isNonTemporal,
   2011                                      isVolatile, Alignment);
   2012   return Result;
   2013 }
   2014 
   2015 SDValue
   2016 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
   2017   std::string ParamSym;
   2018   raw_string_ostream ParamStr(ParamSym);
   2019 
   2020   ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx;
   2021   ParamStr.flush();
   2022 
   2023   std::string *SavedStr =
   2024     nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str());
   2025   return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
   2026 }
   2027 
   2028 // Check to see if the kernel argument is image*_t or sampler_t
   2029 
   2030 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
   2031   static const char *const specialTypes[] = { "struct._image2d_t",
   2032                                               "struct._image3d_t",
   2033                                               "struct._sampler_t" };
   2034 
   2035   Type *Ty = arg->getType();
   2036   auto *PTy = dyn_cast<PointerType>(Ty);
   2037 
   2038   if (!PTy)
   2039     return false;
   2040 
   2041   if (!context)
   2042     return false;
   2043 
   2044   auto *STy = dyn_cast<StructType>(PTy->getElementType());
   2045   const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
   2046 
   2047   return std::find(std::begin(specialTypes), std::end(specialTypes),
   2048                    TypeName) != std::end(specialTypes);
   2049 }
   2050 
   2051 SDValue NVPTXTargetLowering::LowerFormalArguments(
   2052     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
   2053     const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
   2054     SmallVectorImpl<SDValue> &InVals) const {
   2055   MachineFunction &MF = DAG.getMachineFunction();
   2056   const DataLayout &DL = DAG.getDataLayout();
   2057   auto PtrVT = getPointerTy(DAG.getDataLayout());
   2058 
   2059   const Function *F = MF.getFunction();
   2060   const AttributeSet &PAL = F->getAttributes();
   2061   const TargetLowering *TLI = STI.getTargetLowering();
   2062 
   2063   SDValue Root = DAG.getRoot();
   2064   std::vector<SDValue> OutChains;
   2065 
   2066   bool isKernel = llvm::isKernelFunction(*F);
   2067   bool isABI = (STI.getSmVersion() >= 20);
   2068   assert(isABI && "Non-ABI compilation is not supported");
   2069   if (!isABI)
   2070     return Chain;
   2071 
   2072   std::vector<Type *> argTypes;
   2073   std::vector<const Argument *> theArgs;
   2074   for (const Argument &I : F->args()) {
   2075     theArgs.push_back(&I);
   2076     argTypes.push_back(I.getType());
   2077   }
   2078   // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
   2079   // Ins.size() will be larger
   2080   //   * if there is an aggregate argument with multiple fields (each field
   2081   //     showing up separately in Ins)
   2082   //   * if there is a vector argument with more than typical vector-length
   2083   //     elements (generally if more than 4) where each vector element is
   2084   //     individually present in Ins.
   2085   // So a different index should be used for indexing into Ins.
   2086   // See similar issue in LowerCall.
   2087   unsigned InsIdx = 0;
   2088 
   2089   int idx = 0;
   2090   for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
   2091     Type *Ty = argTypes[i];
   2092 
   2093     // If the kernel argument is image*_t or sampler_t, convert it to
   2094     // a i32 constant holding the parameter position. This can later
   2095     // matched in the AsmPrinter to output the correct mangled name.
   2096     if (isImageOrSamplerVal(
   2097             theArgs[i],
   2098             (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
   2099                                      : nullptr))) {
   2100       assert(isKernel && "Only kernels can have image/sampler params");
   2101       InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
   2102       continue;
   2103     }
   2104 
   2105     if (theArgs[i]->use_empty()) {
   2106       // argument is dead
   2107       if (Ty->isAggregateType()) {
   2108         SmallVector<EVT, 16> vtparts;
   2109 
   2110         ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
   2111         assert(vtparts.size() > 0 && "empty aggregate type not expected");
   2112         for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
   2113              ++parti) {
   2114           InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
   2115           ++InsIdx;
   2116         }
   2117         if (vtparts.size() > 0)
   2118           --InsIdx;
   2119         continue;
   2120       }
   2121       if (Ty->isVectorTy()) {
   2122         EVT ObjectVT = getValueType(DL, Ty);
   2123         unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
   2124         for (unsigned parti = 0; parti < NumRegs; ++parti) {
   2125           InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
   2126           ++InsIdx;
   2127         }
   2128         if (NumRegs > 0)
   2129           --InsIdx;
   2130         continue;
   2131       }
   2132       InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
   2133       continue;
   2134     }
   2135 
   2136     // In the following cases, assign a node order of "idx+1"
   2137     // to newly created nodes. The SDNodes for params have to
   2138     // appear in the same order as their order of appearance
   2139     // in the original function. "idx+1" holds that order.
   2140     if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) {
   2141       if (Ty->isAggregateType()) {
   2142         SmallVector<EVT, 16> vtparts;
   2143         SmallVector<uint64_t, 16> offsets;
   2144 
   2145         // NOTE: Here, we lose the ability to issue vector loads for vectors
   2146         // that are a part of a struct.  This should be investigated in the
   2147         // future.
   2148         ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets,
   2149                            0);
   2150         assert(vtparts.size() > 0 && "empty aggregate type not expected");
   2151         bool aggregateIsPacked = false;
   2152         if (StructType *STy = llvm::dyn_cast<StructType>(Ty))
   2153           aggregateIsPacked = STy->isPacked();
   2154 
   2155         SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
   2156         for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
   2157              ++parti) {
   2158           EVT partVT = vtparts[parti];
   2159           Value *srcValue = Constant::getNullValue(
   2160               PointerType::get(partVT.getTypeForEVT(F->getContext()),
   2161                                llvm::ADDRESS_SPACE_PARAM));
   2162           SDValue srcAddr =
   2163               DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
   2164                           DAG.getConstant(offsets[parti], dl, PtrVT));
   2165           unsigned partAlign = aggregateIsPacked
   2166                                    ? 1
   2167                                    : DL.getABITypeAlignment(
   2168                                          partVT.getTypeForEVT(F->getContext()));
   2169           SDValue p;
   2170           if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) {
   2171             ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
   2172                                      ISD::SEXTLOAD : ISD::ZEXTLOAD;
   2173             p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
   2174                                MachinePointerInfo(srcValue), partVT, false,
   2175                                false, false, partAlign);
   2176           } else {
   2177             p = DAG.getLoad(partVT, dl, Root, srcAddr,
   2178                             MachinePointerInfo(srcValue), false, false, false,
   2179                             partAlign);
   2180           }
   2181           if (p.getNode())
   2182             p.getNode()->setIROrder(idx + 1);
   2183           InVals.push_back(p);
   2184           ++InsIdx;
   2185         }
   2186         if (vtparts.size() > 0)
   2187           --InsIdx;
   2188         continue;
   2189       }
   2190       if (Ty->isVectorTy()) {
   2191         EVT ObjectVT = getValueType(DL, Ty);
   2192         SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
   2193         unsigned NumElts = ObjectVT.getVectorNumElements();
   2194         assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
   2195                "Vector was not scalarized");
   2196         EVT EltVT = ObjectVT.getVectorElementType();
   2197 
   2198         // V1 load
   2199         // f32 = load ...
   2200         if (NumElts == 1) {
   2201           // We only have one element, so just directly load it
   2202           Value *SrcValue = Constant::getNullValue(PointerType::get(
   2203               EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
   2204           SDValue P = DAG.getLoad(
   2205               EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false,
   2206               true,
   2207               DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
   2208           if (P.getNode())
   2209             P.getNode()->setIROrder(idx + 1);
   2210 
   2211           if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
   2212             P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P);
   2213           InVals.push_back(P);
   2214           ++InsIdx;
   2215         } else if (NumElts == 2) {
   2216           // V2 load
   2217           // f32,f32 = load ...
   2218           EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
   2219           Value *SrcValue = Constant::getNullValue(PointerType::get(
   2220               VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
   2221           SDValue P = DAG.getLoad(
   2222               VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false,
   2223               true,
   2224               DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
   2225           if (P.getNode())
   2226             P.getNode()->setIROrder(idx + 1);
   2227 
   2228           SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
   2229                                      DAG.getIntPtrConstant(0, dl));
   2230           SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
   2231                                      DAG.getIntPtrConstant(1, dl));
   2232 
   2233           if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) {
   2234             Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0);
   2235             Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1);
   2236           }
   2237 
   2238           InVals.push_back(Elt0);
   2239           InVals.push_back(Elt1);
   2240           InsIdx += 2;
   2241         } else {
   2242           // V4 loads
   2243           // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
   2244           // the
   2245           // vector will be expanded to a power of 2 elements, so we know we can
   2246           // always round up to the next multiple of 4 when creating the vector
   2247           // loads.
   2248           // e.g.  4 elem => 1 ld.v4
   2249           //       6 elem => 2 ld.v4
   2250           //       8 elem => 2 ld.v4
   2251           //      11 elem => 3 ld.v4
   2252           unsigned VecSize = 4;
   2253           if (EltVT.getSizeInBits() == 64) {
   2254             VecSize = 2;
   2255           }
   2256           EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
   2257           unsigned Ofst = 0;
   2258           for (unsigned i = 0; i < NumElts; i += VecSize) {
   2259             Value *SrcValue = Constant::getNullValue(
   2260                 PointerType::get(VecVT.getTypeForEVT(F->getContext()),
   2261                                  llvm::ADDRESS_SPACE_PARAM));
   2262             SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
   2263                                           DAG.getConstant(Ofst, dl, PtrVT));
   2264             SDValue P = DAG.getLoad(
   2265                 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
   2266                 false, true,
   2267                 DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
   2268             if (P.getNode())
   2269               P.getNode()->setIROrder(idx + 1);
   2270 
   2271             for (unsigned j = 0; j < VecSize; ++j) {
   2272               if (i + j >= NumElts)
   2273                 break;
   2274               SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
   2275                                         DAG.getIntPtrConstant(j, dl));
   2276               if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
   2277                 Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
   2278               InVals.push_back(Elt);
   2279             }
   2280             Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
   2281           }
   2282           InsIdx += NumElts;
   2283         }
   2284 
   2285         if (NumElts > 0)
   2286           --InsIdx;
   2287         continue;
   2288       }
   2289       // A plain scalar.
   2290       EVT ObjectVT = getValueType(DL, Ty);
   2291       // If ABI, load from the param symbol
   2292       SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
   2293       Value *srcValue = Constant::getNullValue(PointerType::get(
   2294           ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
   2295       SDValue p;
   2296        if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
   2297         ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
   2298                                        ISD::SEXTLOAD : ISD::ZEXTLOAD;
   2299         p = DAG.getExtLoad(
   2300             ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue),
   2301             ObjectVT, false, false, false,
   2302             DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
   2303       } else {
   2304         p = DAG.getLoad(
   2305             Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue), false,
   2306             false, false,
   2307             DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
   2308       }
   2309       if (p.getNode())
   2310         p.getNode()->setIROrder(idx + 1);
   2311       InVals.push_back(p);
   2312       continue;
   2313     }
   2314 
   2315     // Param has ByVal attribute
   2316     // Return MoveParam(param symbol).
   2317     // Ideally, the param symbol can be returned directly,
   2318     // but when SDNode builder decides to use it in a CopyToReg(),
   2319     // machine instruction fails because TargetExternalSymbol
   2320     // (not lowered) is target dependent, and CopyToReg assumes
   2321     // the source is lowered.
   2322     EVT ObjectVT = getValueType(DL, Ty);
   2323     assert(ObjectVT == Ins[InsIdx].VT &&
   2324            "Ins type did not match function type");
   2325     SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
   2326     SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
   2327     if (p.getNode())
   2328       p.getNode()->setIROrder(idx + 1);
   2329     if (isKernel)
   2330       InVals.push_back(p);
   2331     else {
   2332       SDValue p2 = DAG.getNode(
   2333           ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
   2334           DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p);
   2335       InVals.push_back(p2);
   2336     }
   2337   }
   2338 
   2339   // Clang will check explicit VarArg and issue error if any. However, Clang
   2340   // will let code with
   2341   // implicit var arg like f() pass. See bug 617733.
   2342   // We treat this case as if the arg list is empty.
   2343   // if (F.isVarArg()) {
   2344   // assert(0 && "VarArg not supported yet!");
   2345   //}
   2346 
   2347   if (!OutChains.empty())
   2348     DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
   2349 
   2350   return Chain;
   2351 }
   2352 
   2353 
   2354 SDValue
   2355 NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   2356                                  bool isVarArg,
   2357                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
   2358                                  const SmallVectorImpl<SDValue> &OutVals,
   2359                                  SDLoc dl, SelectionDAG &DAG) const {
   2360   MachineFunction &MF = DAG.getMachineFunction();
   2361   const Function *F = MF.getFunction();
   2362   Type *RetTy = F->getReturnType();
   2363   const DataLayout &TD = DAG.getDataLayout();
   2364 
   2365   bool isABI = (STI.getSmVersion() >= 20);
   2366   assert(isABI && "Non-ABI compilation is not supported");
   2367   if (!isABI)
   2368     return Chain;
   2369 
   2370   if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) {
   2371     // If we have a vector type, the OutVals array will be the scalarized
   2372     // components and we have combine them into 1 or more vector stores.
   2373     unsigned NumElts = VTy->getNumElements();
   2374     assert(NumElts == Outs.size() && "Bad scalarization of return value");
   2375 
   2376     // const_cast can be removed in later LLVM versions
   2377     EVT EltVT = getValueType(TD, RetTy).getVectorElementType();
   2378     bool NeedExtend = false;
   2379     if (EltVT.getSizeInBits() < 16)
   2380       NeedExtend = true;
   2381 
   2382     // V1 store
   2383     if (NumElts == 1) {
   2384       SDValue StoreVal = OutVals[0];
   2385       // We only have one element, so just directly store it
   2386       if (NeedExtend)
   2387         StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
   2388       SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal };
   2389       Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
   2390                                       DAG.getVTList(MVT::Other), Ops,
   2391                                       EltVT, MachinePointerInfo());
   2392 
   2393     } else if (NumElts == 2) {
   2394       // V2 store
   2395       SDValue StoreVal0 = OutVals[0];
   2396       SDValue StoreVal1 = OutVals[1];
   2397 
   2398       if (NeedExtend) {
   2399         StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0);
   2400         StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1);
   2401       }
   2402 
   2403       SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0,
   2404                         StoreVal1 };
   2405       Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl,
   2406                                       DAG.getVTList(MVT::Other), Ops,
   2407                                       EltVT, MachinePointerInfo());
   2408     } else {
   2409       // V4 stores
   2410       // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the
   2411       // vector will be expanded to a power of 2 elements, so we know we can
   2412       // always round up to the next multiple of 4 when creating the vector
   2413       // stores.
   2414       // e.g.  4 elem => 1 st.v4
   2415       //       6 elem => 2 st.v4
   2416       //       8 elem => 2 st.v4
   2417       //      11 elem => 3 st.v4
   2418 
   2419       unsigned VecSize = 4;
   2420       if (OutVals[0].getValueType().getSizeInBits() == 64)
   2421         VecSize = 2;
   2422 
   2423       unsigned Offset = 0;
   2424 
   2425       EVT VecVT =
   2426           EVT::getVectorVT(F->getContext(), EltVT, VecSize);
   2427       unsigned PerStoreOffset =
   2428           TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
   2429 
   2430       for (unsigned i = 0; i < NumElts; i += VecSize) {
   2431         // Get values
   2432         SDValue StoreVal;
   2433         SmallVector<SDValue, 8> Ops;
   2434         Ops.push_back(Chain);
   2435         Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32));
   2436         unsigned Opc = NVPTXISD::StoreRetvalV2;
   2437         EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType();
   2438 
   2439         StoreVal = OutVals[i];
   2440         if (NeedExtend)
   2441           StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
   2442         Ops.push_back(StoreVal);
   2443 
   2444         if (i + 1 < NumElts) {
   2445           StoreVal = OutVals[i + 1];
   2446           if (NeedExtend)
   2447             StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
   2448         } else {
   2449           StoreVal = DAG.getUNDEF(ExtendedVT);
   2450         }
   2451         Ops.push_back(StoreVal);
   2452 
   2453         if (VecSize == 4) {
   2454           Opc = NVPTXISD::StoreRetvalV4;
   2455           if (i + 2 < NumElts) {
   2456             StoreVal = OutVals[i + 2];
   2457             if (NeedExtend)
   2458               StoreVal =
   2459                   DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
   2460           } else {
   2461             StoreVal = DAG.getUNDEF(ExtendedVT);
   2462           }
   2463           Ops.push_back(StoreVal);
   2464 
   2465           if (i + 3 < NumElts) {
   2466             StoreVal = OutVals[i + 3];
   2467             if (NeedExtend)
   2468               StoreVal =
   2469                   DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
   2470           } else {
   2471             StoreVal = DAG.getUNDEF(ExtendedVT);
   2472           }
   2473           Ops.push_back(StoreVal);
   2474         }
   2475 
   2476         // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());
   2477         Chain =
   2478             DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops,
   2479                                     EltVT, MachinePointerInfo());
   2480         Offset += PerStoreOffset;
   2481       }
   2482     }
   2483   } else {
   2484     SmallVector<EVT, 16> ValVTs;
   2485     SmallVector<uint64_t, 16> Offsets;
   2486     ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0);
   2487     assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
   2488 
   2489     for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
   2490       SDValue theVal = OutVals[i];
   2491       EVT TheValType = theVal.getValueType();
   2492       unsigned numElems = 1;
   2493       if (TheValType.isVector())
   2494         numElems = TheValType.getVectorNumElements();
   2495       for (unsigned j = 0, je = numElems; j != je; ++j) {
   2496         SDValue TmpVal = theVal;
   2497         if (TheValType.isVector())
   2498           TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
   2499                                TheValType.getVectorElementType(), TmpVal,
   2500                                DAG.getIntPtrConstant(j, dl));
   2501         EVT TheStoreType = ValVTs[i];
   2502         if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) {
   2503           // The following zero-extension is for integer types only, and
   2504           // specifically not for aggregates.
   2505           TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
   2506           TheStoreType = MVT::i32;
   2507         }
   2508         else if (TmpVal.getValueType().getSizeInBits() < 16)
   2509           TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
   2510 
   2511         SDValue Ops[] = {
   2512           Chain,
   2513           DAG.getConstant(Offsets[i], dl, MVT::i32),
   2514           TmpVal };
   2515         Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
   2516                                         DAG.getVTList(MVT::Other), Ops,
   2517                                         TheStoreType,
   2518                                         MachinePointerInfo());
   2519       }
   2520     }
   2521   }
   2522 
   2523   return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
   2524 }
   2525 
   2526 
   2527 void NVPTXTargetLowering::LowerAsmOperandForConstraint(
   2528     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
   2529     SelectionDAG &DAG) const {
   2530   if (Constraint.length() > 1)
   2531     return;
   2532   else
   2533     TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
   2534 }
   2535 
   2536 static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
   2537   switch (Intrinsic) {
   2538   default:
   2539     return 0;
   2540 
   2541   case Intrinsic::nvvm_tex_1d_v4f32_s32:
   2542     return NVPTXISD::Tex1DFloatS32;
   2543   case Intrinsic::nvvm_tex_1d_v4f32_f32:
   2544     return NVPTXISD::Tex1DFloatFloat;
   2545   case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
   2546     return NVPTXISD::Tex1DFloatFloatLevel;
   2547   case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
   2548     return NVPTXISD::Tex1DFloatFloatGrad;
   2549   case Intrinsic::nvvm_tex_1d_v4s32_s32:
   2550     return NVPTXISD::Tex1DS32S32;
   2551   case Intrinsic::nvvm_tex_1d_v4s32_f32:
   2552     return NVPTXISD::Tex1DS32Float;
   2553   case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
   2554     return NVPTXISD::Tex1DS32FloatLevel;
   2555   case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
   2556     return NVPTXISD::Tex1DS32FloatGrad;
   2557   case Intrinsic::nvvm_tex_1d_v4u32_s32:
   2558     return NVPTXISD::Tex1DU32S32;
   2559   case Intrinsic::nvvm_tex_1d_v4u32_f32:
   2560     return NVPTXISD::Tex1DU32Float;
   2561   case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
   2562     return NVPTXISD::Tex1DU32FloatLevel;
   2563   case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
   2564     return NVPTXISD::Tex1DU32FloatGrad;
   2565 
   2566   case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
   2567     return NVPTXISD::Tex1DArrayFloatS32;
   2568   case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
   2569     return NVPTXISD::Tex1DArrayFloatFloat;
   2570   case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
   2571     return NVPTXISD::Tex1DArrayFloatFloatLevel;
   2572   case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
   2573     return NVPTXISD::Tex1DArrayFloatFloatGrad;
   2574   case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
   2575     return NVPTXISD::Tex1DArrayS32S32;
   2576   case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
   2577     return NVPTXISD::Tex1DArrayS32Float;
   2578   case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
   2579     return NVPTXISD::Tex1DArrayS32FloatLevel;
   2580   case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
   2581     return NVPTXISD::Tex1DArrayS32FloatGrad;
   2582   case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
   2583     return NVPTXISD::Tex1DArrayU32S32;
   2584   case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
   2585     return NVPTXISD::Tex1DArrayU32Float;
   2586   case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
   2587     return NVPTXISD::Tex1DArrayU32FloatLevel;
   2588   case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
   2589     return NVPTXISD::Tex1DArrayU32FloatGrad;
   2590 
   2591   case Intrinsic::nvvm_tex_2d_v4f32_s32:
   2592     return NVPTXISD::Tex2DFloatS32;
   2593   case Intrinsic::nvvm_tex_2d_v4f32_f32:
   2594     return NVPTXISD::Tex2DFloatFloat;
   2595   case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
   2596     return NVPTXISD::Tex2DFloatFloatLevel;
   2597   case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
   2598     return NVPTXISD::Tex2DFloatFloatGrad;
   2599   case Intrinsic::nvvm_tex_2d_v4s32_s32:
   2600     return NVPTXISD::Tex2DS32S32;
   2601   case Intrinsic::nvvm_tex_2d_v4s32_f32:
   2602     return NVPTXISD::Tex2DS32Float;
   2603   case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
   2604     return NVPTXISD::Tex2DS32FloatLevel;
   2605   case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
   2606     return NVPTXISD::Tex2DS32FloatGrad;
   2607   case Intrinsic::nvvm_tex_2d_v4u32_s32:
   2608     return NVPTXISD::Tex2DU32S32;
   2609   case Intrinsic::nvvm_tex_2d_v4u32_f32:
   2610     return NVPTXISD::Tex2DU32Float;
   2611   case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
   2612     return NVPTXISD::Tex2DU32FloatLevel;
   2613   case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
   2614     return NVPTXISD::Tex2DU32FloatGrad;
   2615 
   2616   case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
   2617     return NVPTXISD::Tex2DArrayFloatS32;
   2618   case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
   2619     return NVPTXISD::Tex2DArrayFloatFloat;
   2620   case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
   2621     return NVPTXISD::Tex2DArrayFloatFloatLevel;
   2622   case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
   2623     return NVPTXISD::Tex2DArrayFloatFloatGrad;
   2624   case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
   2625     return NVPTXISD::Tex2DArrayS32S32;
   2626   case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
   2627     return NVPTXISD::Tex2DArrayS32Float;
   2628   case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
   2629     return NVPTXISD::Tex2DArrayS32FloatLevel;
   2630   case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
   2631     return NVPTXISD::Tex2DArrayS32FloatGrad;
   2632   case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
   2633     return NVPTXISD::Tex2DArrayU32S32;
   2634   case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
   2635     return NVPTXISD::Tex2DArrayU32Float;
   2636   case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
   2637     return NVPTXISD::Tex2DArrayU32FloatLevel;
   2638   case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
   2639     return NVPTXISD::Tex2DArrayU32FloatGrad;
   2640 
   2641   case Intrinsic::nvvm_tex_3d_v4f32_s32:
   2642     return NVPTXISD::Tex3DFloatS32;
   2643   case Intrinsic::nvvm_tex_3d_v4f32_f32:
   2644     return NVPTXISD::Tex3DFloatFloat;
   2645   case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
   2646     return NVPTXISD::Tex3DFloatFloatLevel;
   2647   case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
   2648     return NVPTXISD::Tex3DFloatFloatGrad;
   2649   case Intrinsic::nvvm_tex_3d_v4s32_s32:
   2650     return NVPTXISD::Tex3DS32S32;
   2651   case Intrinsic::nvvm_tex_3d_v4s32_f32:
   2652     return NVPTXISD::Tex3DS32Float;
   2653   case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
   2654     return NVPTXISD::Tex3DS32FloatLevel;
   2655   case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
   2656     return NVPTXISD::Tex3DS32FloatGrad;
   2657   case Intrinsic::nvvm_tex_3d_v4u32_s32:
   2658     return NVPTXISD::Tex3DU32S32;
   2659   case Intrinsic::nvvm_tex_3d_v4u32_f32:
   2660     return NVPTXISD::Tex3DU32Float;
   2661   case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
   2662     return NVPTXISD::Tex3DU32FloatLevel;
   2663   case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
   2664     return NVPTXISD::Tex3DU32FloatGrad;
   2665 
   2666   case Intrinsic::nvvm_tex_cube_v4f32_f32:
   2667     return NVPTXISD::TexCubeFloatFloat;
   2668   case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
   2669     return NVPTXISD::TexCubeFloatFloatLevel;
   2670   case Intrinsic::nvvm_tex_cube_v4s32_f32:
   2671     return NVPTXISD::TexCubeS32Float;
   2672   case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
   2673     return NVPTXISD::TexCubeS32FloatLevel;
   2674   case Intrinsic::nvvm_tex_cube_v4u32_f32:
   2675     return NVPTXISD::TexCubeU32Float;
   2676   case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
   2677     return NVPTXISD::TexCubeU32FloatLevel;
   2678 
   2679   case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
   2680     return NVPTXISD::TexCubeArrayFloatFloat;
   2681   case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
   2682     return NVPTXISD::TexCubeArrayFloatFloatLevel;
   2683   case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
   2684     return NVPTXISD::TexCubeArrayS32Float;
   2685   case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
   2686     return NVPTXISD::TexCubeArrayS32FloatLevel;
   2687   case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
   2688     return NVPTXISD::TexCubeArrayU32Float;
   2689   case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
   2690     return NVPTXISD::TexCubeArrayU32FloatLevel;
   2691 
   2692   case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
   2693     return NVPTXISD::Tld4R2DFloatFloat;
   2694   case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
   2695     return NVPTXISD::Tld4G2DFloatFloat;
   2696   case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
   2697     return NVPTXISD::Tld4B2DFloatFloat;
   2698   case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
   2699     return NVPTXISD::Tld4A2DFloatFloat;
   2700   case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
   2701     return NVPTXISD::Tld4R2DS64Float;
   2702   case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
   2703     return NVPTXISD::Tld4G2DS64Float;
   2704   case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
   2705     return NVPTXISD::Tld4B2DS64Float;
   2706   case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
   2707     return NVPTXISD::Tld4A2DS64Float;
   2708   case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
   2709     return NVPTXISD::Tld4R2DU64Float;
   2710   case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
   2711     return NVPTXISD::Tld4G2DU64Float;
   2712   case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
   2713     return NVPTXISD::Tld4B2DU64Float;
   2714   case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
   2715     return NVPTXISD::Tld4A2DU64Float;
   2716 
   2717   case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
   2718     return NVPTXISD::TexUnified1DFloatS32;
   2719   case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
   2720     return NVPTXISD::TexUnified1DFloatFloat;
   2721   case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
   2722     return NVPTXISD::TexUnified1DFloatFloatLevel;
   2723   case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
   2724     return NVPTXISD::TexUnified1DFloatFloatGrad;
   2725   case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
   2726     return NVPTXISD::TexUnified1DS32S32;
   2727   case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
   2728     return NVPTXISD::TexUnified1DS32Float;
   2729   case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
   2730     return NVPTXISD::TexUnified1DS32FloatLevel;
   2731   case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
   2732     return NVPTXISD::TexUnified1DS32FloatGrad;
   2733   case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
   2734     return NVPTXISD::TexUnified1DU32S32;
   2735   case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
   2736     return NVPTXISD::TexUnified1DU32Float;
   2737   case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
   2738     return NVPTXISD::TexUnified1DU32FloatLevel;
   2739   case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
   2740     return NVPTXISD::TexUnified1DU32FloatGrad;
   2741 
   2742   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
   2743     return NVPTXISD::TexUnified1DArrayFloatS32;
   2744   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
   2745     return NVPTXISD::TexUnified1DArrayFloatFloat;
   2746   case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
   2747     return NVPTXISD::TexUnified1DArrayFloatFloatLevel;
   2748   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
   2749     return NVPTXISD::TexUnified1DArrayFloatFloatGrad;
   2750   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
   2751     return NVPTXISD::TexUnified1DArrayS32S32;
   2752   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
   2753     return NVPTXISD::TexUnified1DArrayS32Float;
   2754   case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
   2755     return NVPTXISD::TexUnified1DArrayS32FloatLevel;
   2756   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
   2757     return NVPTXISD::TexUnified1DArrayS32FloatGrad;
   2758   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
   2759     return NVPTXISD::TexUnified1DArrayU32S32;
   2760   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
   2761     return NVPTXISD::TexUnified1DArrayU32Float;
   2762   case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
   2763     return NVPTXISD::TexUnified1DArrayU32FloatLevel;
   2764   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
   2765     return NVPTXISD::TexUnified1DArrayU32FloatGrad;
   2766 
   2767   case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
   2768     return NVPTXISD::TexUnified2DFloatS32;
   2769   case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
   2770     return NVPTXISD::TexUnified2DFloatFloat;
   2771   case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
   2772     return NVPTXISD::TexUnified2DFloatFloatLevel;
   2773   case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
   2774     return NVPTXISD::TexUnified2DFloatFloatGrad;
   2775   case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
   2776     return NVPTXISD::TexUnified2DS32S32;
   2777   case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
   2778     return NVPTXISD::TexUnified2DS32Float;
   2779   case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
   2780     return NVPTXISD::TexUnified2DS32FloatLevel;
   2781   case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
   2782     return NVPTXISD::TexUnified2DS32FloatGrad;
   2783   case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
   2784     return NVPTXISD::TexUnified2DU32S32;
   2785   case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
   2786     return NVPTXISD::TexUnified2DU32Float;
   2787   case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
   2788     return NVPTXISD::TexUnified2DU32FloatLevel;
   2789   case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
   2790     return NVPTXISD::TexUnified2DU32FloatGrad;
   2791 
   2792   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
   2793     return NVPTXISD::TexUnified2DArrayFloatS32;
   2794   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
   2795     return NVPTXISD::TexUnified2DArrayFloatFloat;
   2796   case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
   2797     return NVPTXISD::TexUnified2DArrayFloatFloatLevel;
   2798   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
   2799     return NVPTXISD::TexUnified2DArrayFloatFloatGrad;
   2800   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
   2801     return NVPTXISD::TexUnified2DArrayS32S32;
   2802   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
   2803     return NVPTXISD::TexUnified2DArrayS32Float;
   2804   case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
   2805     return NVPTXISD::TexUnified2DArrayS32FloatLevel;
   2806   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
   2807     return NVPTXISD::TexUnified2DArrayS32FloatGrad;
   2808   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
   2809     return NVPTXISD::TexUnified2DArrayU32S32;
   2810   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
   2811     return NVPTXISD::TexUnified2DArrayU32Float;
   2812   case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
   2813     return NVPTXISD::TexUnified2DArrayU32FloatLevel;
   2814   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
   2815     return NVPTXISD::TexUnified2DArrayU32FloatGrad;
   2816 
   2817   case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
   2818     return NVPTXISD::TexUnified3DFloatS32;
   2819   case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
   2820     return NVPTXISD::TexUnified3DFloatFloat;
   2821   case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
   2822     return NVPTXISD::TexUnified3DFloatFloatLevel;
   2823   case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
   2824     return NVPTXISD::TexUnified3DFloatFloatGrad;
   2825   case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
   2826     return NVPTXISD::TexUnified3DS32S32;
   2827   case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
   2828     return NVPTXISD::TexUnified3DS32Float;
   2829   case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
   2830     return NVPTXISD::TexUnified3DS32FloatLevel;
   2831   case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
   2832     return NVPTXISD::TexUnified3DS32FloatGrad;
   2833   case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
   2834     return NVPTXISD::TexUnified3DU32S32;
   2835   case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
   2836     return NVPTXISD::TexUnified3DU32Float;
   2837   case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
   2838     return NVPTXISD::TexUnified3DU32FloatLevel;
   2839   case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
   2840     return NVPTXISD::TexUnified3DU32FloatGrad;
   2841 
   2842   case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
   2843     return NVPTXISD::TexUnifiedCubeFloatFloat;
   2844   case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
   2845     return NVPTXISD::TexUnifiedCubeFloatFloatLevel;
   2846   case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
   2847     return NVPTXISD::TexUnifiedCubeS32Float;
   2848   case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
   2849     return NVPTXISD::TexUnifiedCubeS32FloatLevel;
   2850   case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
   2851     return NVPTXISD::TexUnifiedCubeU32Float;
   2852   case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
   2853     return NVPTXISD::TexUnifiedCubeU32FloatLevel;
   2854 
   2855   case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
   2856     return NVPTXISD::TexUnifiedCubeArrayFloatFloat;
   2857   case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
   2858     return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel;
   2859   case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
   2860     return NVPTXISD::TexUnifiedCubeArrayS32Float;
   2861   case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
   2862     return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel;
   2863   case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
   2864     return NVPTXISD::TexUnifiedCubeArrayU32Float;
   2865   case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
   2866     return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
   2867 
   2868   case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
   2869     return NVPTXISD::Tld4UnifiedR2DFloatFloat;
   2870   case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
   2871     return NVPTXISD::Tld4UnifiedG2DFloatFloat;
   2872   case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
   2873     return NVPTXISD::Tld4UnifiedB2DFloatFloat;
   2874   case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
   2875     return NVPTXISD::Tld4UnifiedA2DFloatFloat;
   2876   case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
   2877     return NVPTXISD::Tld4UnifiedR2DS64Float;
   2878   case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
   2879     return NVPTXISD::Tld4UnifiedG2DS64Float;
   2880   case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
   2881     return NVPTXISD::Tld4UnifiedB2DS64Float;
   2882   case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
   2883     return NVPTXISD::Tld4UnifiedA2DS64Float;
   2884   case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
   2885     return NVPTXISD::Tld4UnifiedR2DU64Float;
   2886   case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
   2887     return NVPTXISD::Tld4UnifiedG2DU64Float;
   2888   case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
   2889     return NVPTXISD::Tld4UnifiedB2DU64Float;
   2890   case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
   2891     return NVPTXISD::Tld4UnifiedA2DU64Float;
   2892   }
   2893 }
   2894 
   2895 static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
   2896   switch (Intrinsic) {
   2897   default:
   2898     return 0;
   2899   case Intrinsic::nvvm_suld_1d_i8_clamp:
   2900     return NVPTXISD::Suld1DI8Clamp;
   2901   case Intrinsic::nvvm_suld_1d_i16_clamp:
   2902     return NVPTXISD::Suld1DI16Clamp;
   2903   case Intrinsic::nvvm_suld_1d_i32_clamp:
   2904     return NVPTXISD::Suld1DI32Clamp;
   2905   case Intrinsic::nvvm_suld_1d_i64_clamp:
   2906     return NVPTXISD::Suld1DI64Clamp;
   2907   case Intrinsic::nvvm_suld_1d_v2i8_clamp:
   2908     return NVPTXISD::Suld1DV2I8Clamp;
   2909   case Intrinsic::nvvm_suld_1d_v2i16_clamp:
   2910     return NVPTXISD::Suld1DV2I16Clamp;
   2911   case Intrinsic::nvvm_suld_1d_v2i32_clamp:
   2912     return NVPTXISD::Suld1DV2I32Clamp;
   2913   case Intrinsic::nvvm_suld_1d_v2i64_clamp:
   2914     return NVPTXISD::Suld1DV2I64Clamp;
   2915   case Intrinsic::nvvm_suld_1d_v4i8_clamp:
   2916     return NVPTXISD::Suld1DV4I8Clamp;
   2917   case Intrinsic::nvvm_suld_1d_v4i16_clamp:
   2918     return NVPTXISD::Suld1DV4I16Clamp;
   2919   case Intrinsic::nvvm_suld_1d_v4i32_clamp:
   2920     return NVPTXISD::Suld1DV4I32Clamp;
   2921   case Intrinsic::nvvm_suld_1d_array_i8_clamp:
   2922     return NVPTXISD::Suld1DArrayI8Clamp;
   2923   case Intrinsic::nvvm_suld_1d_array_i16_clamp:
   2924     return NVPTXISD::Suld1DArrayI16Clamp;
   2925   case Intrinsic::nvvm_suld_1d_array_i32_clamp:
   2926     return NVPTXISD::Suld1DArrayI32Clamp;
   2927   case Intrinsic::nvvm_suld_1d_array_i64_clamp:
   2928     return NVPTXISD::Suld1DArrayI64Clamp;
   2929   case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
   2930     return NVPTXISD::Suld1DArrayV2I8Clamp;
   2931   case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
   2932     return NVPTXISD::Suld1DArrayV2I16Clamp;
   2933   case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
   2934     return NVPTXISD::Suld1DArrayV2I32Clamp;
   2935   case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
   2936     return NVPTXISD::Suld1DArrayV2I64Clamp;
   2937   case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
   2938     return NVPTXISD::Suld1DArrayV4I8Clamp;
   2939   case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
   2940     return NVPTXISD::Suld1DArrayV4I16Clamp;
   2941   case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
   2942     return NVPTXISD::Suld1DArrayV4I32Clamp;
   2943   case Intrinsic::nvvm_suld_2d_i8_clamp:
   2944     return NVPTXISD::Suld2DI8Clamp;
   2945   case Intrinsic::nvvm_suld_2d_i16_clamp:
   2946     return NVPTXISD::Suld2DI16Clamp;
   2947   case Intrinsic::nvvm_suld_2d_i32_clamp:
   2948     return NVPTXISD::Suld2DI32Clamp;
   2949   case Intrinsic::nvvm_suld_2d_i64_clamp:
   2950     return NVPTXISD::Suld2DI64Clamp;
   2951   case Intrinsic::nvvm_suld_2d_v2i8_clamp:
   2952     return NVPTXISD::Suld2DV2I8Clamp;
   2953   case Intrinsic::nvvm_suld_2d_v2i16_clamp:
   2954     return NVPTXISD::Suld2DV2I16Clamp;
   2955   case Intrinsic::nvvm_suld_2d_v2i32_clamp:
   2956     return NVPTXISD::Suld2DV2I32Clamp;
   2957   case Intrinsic::nvvm_suld_2d_v2i64_clamp:
   2958     return NVPTXISD::Suld2DV2I64Clamp;
   2959   case Intrinsic::nvvm_suld_2d_v4i8_clamp:
   2960     return NVPTXISD::Suld2DV4I8Clamp;
   2961   case Intrinsic::nvvm_suld_2d_v4i16_clamp:
   2962     return NVPTXISD::Suld2DV4I16Clamp;
   2963   case Intrinsic::nvvm_suld_2d_v4i32_clamp:
   2964     return NVPTXISD::Suld2DV4I32Clamp;
   2965   case Intrinsic::nvvm_suld_2d_array_i8_clamp:
   2966     return NVPTXISD::Suld2DArrayI8Clamp;
   2967   case Intrinsic::nvvm_suld_2d_array_i16_clamp:
   2968     return NVPTXISD::Suld2DArrayI16Clamp;
   2969   case Intrinsic::nvvm_suld_2d_array_i32_clamp:
   2970     return NVPTXISD::Suld2DArrayI32Clamp;
   2971   case Intrinsic::nvvm_suld_2d_array_i64_clamp:
   2972     return NVPTXISD::Suld2DArrayI64Clamp;
   2973   case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
   2974     return NVPTXISD::Suld2DArrayV2I8Clamp;
   2975   case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
   2976     return NVPTXISD::Suld2DArrayV2I16Clamp;
   2977   case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
   2978     return NVPTXISD::Suld2DArrayV2I32Clamp;
   2979   case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
   2980     return NVPTXISD::Suld2DArrayV2I64Clamp;
   2981   case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
   2982     return NVPTXISD::Suld2DArrayV4I8Clamp;
   2983   case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
   2984     return NVPTXISD::Suld2DArrayV4I16Clamp;
   2985   case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
   2986     return NVPTXISD::Suld2DArrayV4I32Clamp;
   2987   case Intrinsic::nvvm_suld_3d_i8_clamp:
   2988     return NVPTXISD::Suld3DI8Clamp;
   2989   case Intrinsic::nvvm_suld_3d_i16_clamp:
   2990     return NVPTXISD::Suld3DI16Clamp;
   2991   case Intrinsic::nvvm_suld_3d_i32_clamp:
   2992     return NVPTXISD::Suld3DI32Clamp;
   2993   case Intrinsic::nvvm_suld_3d_i64_clamp:
   2994     return NVPTXISD::Suld3DI64Clamp;
   2995   case Intrinsic::nvvm_suld_3d_v2i8_clamp:
   2996     return NVPTXISD::Suld3DV2I8Clamp;
   2997   case Intrinsic::nvvm_suld_3d_v2i16_clamp:
   2998     return NVPTXISD::Suld3DV2I16Clamp;
   2999   case Intrinsic::nvvm_suld_3d_v2i32_clamp:
   3000     return NVPTXISD::Suld3DV2I32Clamp;
   3001   case Intrinsic::nvvm_suld_3d_v2i64_clamp:
   3002     return NVPTXISD::Suld3DV2I64Clamp;
   3003   case Intrinsic::nvvm_suld_3d_v4i8_clamp:
   3004     return NVPTXISD::Suld3DV4I8Clamp;
   3005   case Intrinsic::nvvm_suld_3d_v4i16_clamp:
   3006     return NVPTXISD::Suld3DV4I16Clamp;
   3007   case Intrinsic::nvvm_suld_3d_v4i32_clamp:
   3008     return NVPTXISD::Suld3DV4I32Clamp;
   3009   case Intrinsic::nvvm_suld_1d_i8_trap:
   3010     return NVPTXISD::Suld1DI8Trap;
   3011   case Intrinsic::nvvm_suld_1d_i16_trap:
   3012     return NVPTXISD::Suld1DI16Trap;
   3013   case Intrinsic::nvvm_suld_1d_i32_trap:
   3014     return NVPTXISD::Suld1DI32Trap;
   3015   case Intrinsic::nvvm_suld_1d_i64_trap:
   3016     return NVPTXISD::Suld1DI64Trap;
   3017   case Intrinsic::nvvm_suld_1d_v2i8_trap:
   3018     return NVPTXISD::Suld1DV2I8Trap;
   3019   case Intrinsic::nvvm_suld_1d_v2i16_trap:
   3020     return NVPTXISD::Suld1DV2I16Trap;
   3021   case Intrinsic::nvvm_suld_1d_v2i32_trap:
   3022     return NVPTXISD::Suld1DV2I32Trap;
   3023   case Intrinsic::nvvm_suld_1d_v2i64_trap:
   3024     return NVPTXISD::Suld1DV2I64Trap;
   3025   case Intrinsic::nvvm_suld_1d_v4i8_trap:
   3026     return NVPTXISD::Suld1DV4I8Trap;
   3027   case Intrinsic::nvvm_suld_1d_v4i16_trap:
   3028     return NVPTXISD::Suld1DV4I16Trap;
   3029   case Intrinsic::nvvm_suld_1d_v4i32_trap:
   3030     return NVPTXISD::Suld1DV4I32Trap;
   3031   case Intrinsic::nvvm_suld_1d_array_i8_trap:
   3032     return NVPTXISD::Suld1DArrayI8Trap;
   3033   case Intrinsic::nvvm_suld_1d_array_i16_trap:
   3034     return NVPTXISD::Suld1DArrayI16Trap;
   3035   case Intrinsic::nvvm_suld_1d_array_i32_trap:
   3036     return NVPTXISD::Suld1DArrayI32Trap;
   3037   case Intrinsic::nvvm_suld_1d_array_i64_trap:
   3038     return NVPTXISD::Suld1DArrayI64Trap;
   3039   case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
   3040     return NVPTXISD::Suld1DArrayV2I8Trap;
   3041   case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
   3042     return NVPTXISD::Suld1DArrayV2I16Trap;
   3043   case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
   3044     return NVPTXISD::Suld1DArrayV2I32Trap;
   3045   case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
   3046     return NVPTXISD::Suld1DArrayV2I64Trap;
   3047   case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
   3048     return NVPTXISD::Suld1DArrayV4I8Trap;
   3049   case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
   3050     return NVPTXISD::Suld1DArrayV4I16Trap;
   3051   case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
   3052     return NVPTXISD::Suld1DArrayV4I32Trap;
   3053   case Intrinsic::nvvm_suld_2d_i8_trap:
   3054     return NVPTXISD::Suld2DI8Trap;
   3055   case Intrinsic::nvvm_suld_2d_i16_trap:
   3056     return NVPTXISD::Suld2DI16Trap;
   3057   case Intrinsic::nvvm_suld_2d_i32_trap:
   3058     return NVPTXISD::Suld2DI32Trap;
   3059   case Intrinsic::nvvm_suld_2d_i64_trap:
   3060     return NVPTXISD::Suld2DI64Trap;
   3061   case Intrinsic::nvvm_suld_2d_v2i8_trap:
   3062     return NVPTXISD::Suld2DV2I8Trap;
   3063   case Intrinsic::nvvm_suld_2d_v2i16_trap:
   3064     return NVPTXISD::Suld2DV2I16Trap;
   3065   case Intrinsic::nvvm_suld_2d_v2i32_trap:
   3066     return NVPTXISD::Suld2DV2I32Trap;
   3067   case Intrinsic::nvvm_suld_2d_v2i64_trap:
   3068     return NVPTXISD::Suld2DV2I64Trap;
   3069   case Intrinsic::nvvm_suld_2d_v4i8_trap:
   3070     return NVPTXISD::Suld2DV4I8Trap;
   3071   case Intrinsic::nvvm_suld_2d_v4i16_trap:
   3072     return NVPTXISD::Suld2DV4I16Trap;
   3073   case Intrinsic::nvvm_suld_2d_v4i32_trap:
   3074     return NVPTXISD::Suld2DV4I32Trap;
   3075   case Intrinsic::nvvm_suld_2d_array_i8_trap:
   3076     return NVPTXISD::Suld2DArrayI8Trap;
   3077   case Intrinsic::nvvm_suld_2d_array_i16_trap:
   3078     return NVPTXISD::Suld2DArrayI16Trap;
   3079   case Intrinsic::nvvm_suld_2d_array_i32_trap:
   3080     return NVPTXISD::Suld2DArrayI32Trap;
   3081   case Intrinsic::nvvm_suld_2d_array_i64_trap:
   3082     return NVPTXISD::Suld2DArrayI64Trap;
   3083   case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
   3084     return NVPTXISD::Suld2DArrayV2I8Trap;
   3085   case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
   3086     return NVPTXISD::Suld2DArrayV2I16Trap;
   3087   case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
   3088     return NVPTXISD::Suld2DArrayV2I32Trap;
   3089   case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
   3090     return NVPTXISD::Suld2DArrayV2I64Trap;
   3091   case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
   3092     return NVPTXISD::Suld2DArrayV4I8Trap;
   3093   case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
   3094     return NVPTXISD::Suld2DArrayV4I16Trap;
   3095   case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
   3096     return NVPTXISD::Suld2DArrayV4I32Trap;
   3097   case Intrinsic::nvvm_suld_3d_i8_trap:
   3098     return NVPTXISD::Suld3DI8Trap;
   3099   case Intrinsic::nvvm_suld_3d_i16_trap:
   3100     return NVPTXISD::Suld3DI16Trap;
   3101   case Intrinsic::nvvm_suld_3d_i32_trap:
   3102     return NVPTXISD::Suld3DI32Trap;
   3103   case Intrinsic::nvvm_suld_3d_i64_trap:
   3104     return NVPTXISD::Suld3DI64Trap;
   3105   case Intrinsic::nvvm_suld_3d_v2i8_trap:
   3106     return NVPTXISD::Suld3DV2I8Trap;
   3107   case Intrinsic::nvvm_suld_3d_v2i16_trap:
   3108     return NVPTXISD::Suld3DV2I16Trap;
   3109   case Intrinsic::nvvm_suld_3d_v2i32_trap:
   3110     return NVPTXISD::Suld3DV2I32Trap;
   3111   case Intrinsic::nvvm_suld_3d_v2i64_trap:
   3112     return NVPTXISD::Suld3DV2I64Trap;
   3113   case Intrinsic::nvvm_suld_3d_v4i8_trap:
   3114     return NVPTXISD::Suld3DV4I8Trap;
   3115   case Intrinsic::nvvm_suld_3d_v4i16_trap:
   3116     return NVPTXISD::Suld3DV4I16Trap;
   3117   case Intrinsic::nvvm_suld_3d_v4i32_trap:
   3118     return NVPTXISD::Suld3DV4I32Trap;
   3119   case Intrinsic::nvvm_suld_1d_i8_zero:
   3120     return NVPTXISD::Suld1DI8Zero;
   3121   case Intrinsic::nvvm_suld_1d_i16_zero:
   3122     return NVPTXISD::Suld1DI16Zero;
   3123   case Intrinsic::nvvm_suld_1d_i32_zero:
   3124     return NVPTXISD::Suld1DI32Zero;
   3125   case Intrinsic::nvvm_suld_1d_i64_zero:
   3126     return NVPTXISD::Suld1DI64Zero;
   3127   case Intrinsic::nvvm_suld_1d_v2i8_zero:
   3128     return NVPTXISD::Suld1DV2I8Zero;
   3129   case Intrinsic::nvvm_suld_1d_v2i16_zero:
   3130     return NVPTXISD::Suld1DV2I16Zero;
   3131   case Intrinsic::nvvm_suld_1d_v2i32_zero:
   3132     return NVPTXISD::Suld1DV2I32Zero;
   3133   case Intrinsic::nvvm_suld_1d_v2i64_zero:
   3134     return NVPTXISD::Suld1DV2I64Zero;
   3135   case Intrinsic::nvvm_suld_1d_v4i8_zero:
   3136     return NVPTXISD::Suld1DV4I8Zero;
   3137   case Intrinsic::nvvm_suld_1d_v4i16_zero:
   3138     return NVPTXISD::Suld1DV4I16Zero;
   3139   case Intrinsic::nvvm_suld_1d_v4i32_zero:
   3140     return NVPTXISD::Suld1DV4I32Zero;
   3141   case Intrinsic::nvvm_suld_1d_array_i8_zero:
   3142     return NVPTXISD::Suld1DArrayI8Zero;
   3143   case Intrinsic::nvvm_suld_1d_array_i16_zero:
   3144     return NVPTXISD::Suld1DArrayI16Zero;
   3145   case Intrinsic::nvvm_suld_1d_array_i32_zero:
   3146     return NVPTXISD::Suld1DArrayI32Zero;
   3147   case Intrinsic::nvvm_suld_1d_array_i64_zero:
   3148     return NVPTXISD::Suld1DArrayI64Zero;
   3149   case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
   3150     return NVPTXISD::Suld1DArrayV2I8Zero;
   3151   case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
   3152     return NVPTXISD::Suld1DArrayV2I16Zero;
   3153   case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
   3154     return NVPTXISD::Suld1DArrayV2I32Zero;
   3155   case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
   3156     return NVPTXISD::Suld1DArrayV2I64Zero;
   3157   case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
   3158     return NVPTXISD::Suld1DArrayV4I8Zero;
   3159   case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
   3160     return NVPTXISD::Suld1DArrayV4I16Zero;
   3161   case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
   3162     return NVPTXISD::Suld1DArrayV4I32Zero;
   3163   case Intrinsic::nvvm_suld_2d_i8_zero:
   3164     return NVPTXISD::Suld2DI8Zero;
   3165   case Intrinsic::nvvm_suld_2d_i16_zero:
   3166     return NVPTXISD::Suld2DI16Zero;
   3167   case Intrinsic::nvvm_suld_2d_i32_zero:
   3168     return NVPTXISD::Suld2DI32Zero;
   3169   case Intrinsic::nvvm_suld_2d_i64_zero:
   3170     return NVPTXISD::Suld2DI64Zero;
   3171   case Intrinsic::nvvm_suld_2d_v2i8_zero:
   3172     return NVPTXISD::Suld2DV2I8Zero;
   3173   case Intrinsic::nvvm_suld_2d_v2i16_zero:
   3174     return NVPTXISD::Suld2DV2I16Zero;
   3175   case Intrinsic::nvvm_suld_2d_v2i32_zero:
   3176     return NVPTXISD::Suld2DV2I32Zero;
   3177   case Intrinsic::nvvm_suld_2d_v2i64_zero:
   3178     return NVPTXISD::Suld2DV2I64Zero;
   3179   case Intrinsic::nvvm_suld_2d_v4i8_zero:
   3180     return NVPTXISD::Suld2DV4I8Zero;
   3181   case Intrinsic::nvvm_suld_2d_v4i16_zero:
   3182     return NVPTXISD::Suld2DV4I16Zero;
   3183   case Intrinsic::nvvm_suld_2d_v4i32_zero:
   3184     return NVPTXISD::Suld2DV4I32Zero;
   3185   case Intrinsic::nvvm_suld_2d_array_i8_zero:
   3186     return NVPTXISD::Suld2DArrayI8Zero;
   3187   case Intrinsic::nvvm_suld_2d_array_i16_zero:
   3188     return NVPTXISD::Suld2DArrayI16Zero;
   3189   case Intrinsic::nvvm_suld_2d_array_i32_zero:
   3190     return NVPTXISD::Suld2DArrayI32Zero;
   3191   case Intrinsic::nvvm_suld_2d_array_i64_zero:
   3192     return NVPTXISD::Suld2DArrayI64Zero;
   3193   case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
   3194     return NVPTXISD::Suld2DArrayV2I8Zero;
   3195   case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
   3196     return NVPTXISD::Suld2DArrayV2I16Zero;
   3197   case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
   3198     return NVPTXISD::Suld2DArrayV2I32Zero;
   3199   case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
   3200     return NVPTXISD::Suld2DArrayV2I64Zero;
   3201   case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
   3202     return NVPTXISD::Suld2DArrayV4I8Zero;
   3203   case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
   3204     return NVPTXISD::Suld2DArrayV4I16Zero;
   3205   case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
   3206     return NVPTXISD::Suld2DArrayV4I32Zero;
   3207   case Intrinsic::nvvm_suld_3d_i8_zero:
   3208     return NVPTXISD::Suld3DI8Zero;
   3209   case Intrinsic::nvvm_suld_3d_i16_zero:
   3210     return NVPTXISD::Suld3DI16Zero;
   3211   case Intrinsic::nvvm_suld_3d_i32_zero:
   3212     return NVPTXISD::Suld3DI32Zero;
   3213   case Intrinsic::nvvm_suld_3d_i64_zero:
   3214     return NVPTXISD::Suld3DI64Zero;
   3215   case Intrinsic::nvvm_suld_3d_v2i8_zero:
   3216     return NVPTXISD::Suld3DV2I8Zero;
   3217   case Intrinsic::nvvm_suld_3d_v2i16_zero:
   3218     return NVPTXISD::Suld3DV2I16Zero;
   3219   case Intrinsic::nvvm_suld_3d_v2i32_zero:
   3220     return NVPTXISD::Suld3DV2I32Zero;
   3221   case Intrinsic::nvvm_suld_3d_v2i64_zero:
   3222     return NVPTXISD::Suld3DV2I64Zero;
   3223   case Intrinsic::nvvm_suld_3d_v4i8_zero:
   3224     return NVPTXISD::Suld3DV4I8Zero;
   3225   case Intrinsic::nvvm_suld_3d_v4i16_zero:
   3226     return NVPTXISD::Suld3DV4I16Zero;
   3227   case Intrinsic::nvvm_suld_3d_v4i32_zero:
   3228     return NVPTXISD::Suld3DV4I32Zero;
   3229   }
   3230 }
   3231 
   3232 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
   3233 // TgtMemIntrinsic
   3234 // because we need the information that is only available in the "Value" type
   3235 // of destination
   3236 // pointer. In particular, the address space information.
   3237 bool NVPTXTargetLowering::getTgtMemIntrinsic(
   3238     IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
   3239   switch (Intrinsic) {
   3240   default:
   3241     return false;
   3242 
   3243   case Intrinsic::nvvm_atomic_load_add_f32:
   3244     Info.opc = ISD::INTRINSIC_W_CHAIN;
   3245     Info.memVT = MVT::f32;
   3246     Info.ptrVal = I.getArgOperand(0);
   3247     Info.offset = 0;
   3248     Info.vol = 0;
   3249     Info.readMem = true;
   3250     Info.writeMem = true;
   3251     Info.align = 0;
   3252     return true;
   3253 
   3254   case Intrinsic::nvvm_atomic_load_inc_32:
   3255   case Intrinsic::nvvm_atomic_load_dec_32:
   3256     Info.opc = ISD::INTRINSIC_W_CHAIN;
   3257     Info.memVT = MVT::i32;
   3258     Info.ptrVal = I.getArgOperand(0);
   3259     Info.offset = 0;
   3260     Info.vol = 0;
   3261     Info.readMem = true;
   3262     Info.writeMem = true;
   3263     Info.align = 0;
   3264     return true;
   3265 
   3266   case Intrinsic::nvvm_ldu_global_i:
   3267   case Intrinsic::nvvm_ldu_global_f:
   3268   case Intrinsic::nvvm_ldu_global_p: {
   3269     auto &DL = I.getModule()->getDataLayout();
   3270     Info.opc = ISD::INTRINSIC_W_CHAIN;
   3271     if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
   3272       Info.memVT = getValueType(DL, I.getType());
   3273     else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
   3274       Info.memVT = getPointerTy(DL);
   3275     else
   3276       Info.memVT = getValueType(DL, I.getType());
   3277     Info.ptrVal = I.getArgOperand(0);
   3278     Info.offset = 0;
   3279     Info.vol = 0;
   3280     Info.readMem = true;
   3281     Info.writeMem = false;
   3282     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
   3283 
   3284     return true;
   3285   }
   3286   case Intrinsic::nvvm_ldg_global_i:
   3287   case Intrinsic::nvvm_ldg_global_f:
   3288   case Intrinsic::nvvm_ldg_global_p: {
   3289     auto &DL = I.getModule()->getDataLayout();
   3290 
   3291     Info.opc = ISD::INTRINSIC_W_CHAIN;
   3292     if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
   3293       Info.memVT = getValueType(DL, I.getType());
   3294     else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
   3295       Info.memVT = getPointerTy(DL);
   3296     else
   3297       Info.memVT = getValueType(DL, I.getType());
   3298     Info.ptrVal = I.getArgOperand(0);
   3299     Info.offset = 0;
   3300     Info.vol = 0;
   3301     Info.readMem = true;
   3302     Info.writeMem = false;
   3303     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
   3304 
   3305     return true;
   3306   }
   3307 
   3308   case Intrinsic::nvvm_tex_1d_v4f32_s32:
   3309   case Intrinsic::nvvm_tex_1d_v4f32_f32:
   3310   case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
   3311   case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
   3312   case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
   3313   case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
   3314   case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
   3315   case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
   3316   case Intrinsic::nvvm_tex_2d_v4f32_s32:
   3317   case Intrinsic::nvvm_tex_2d_v4f32_f32:
   3318   case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
   3319   case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
   3320   case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
   3321   case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
   3322   case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
   3323   case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
   3324   case Intrinsic::nvvm_tex_3d_v4f32_s32:
   3325   case Intrinsic::nvvm_tex_3d_v4f32_f32:
   3326   case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
   3327   case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
   3328   case Intrinsic::nvvm_tex_cube_v4f32_f32:
   3329   case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
   3330   case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
   3331   case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
   3332   case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
   3333   case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
   3334   case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
   3335   case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
   3336   case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
   3337   case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
   3338   case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
   3339   case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
   3340   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
   3341   case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
   3342   case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
   3343   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
   3344   case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
   3345   case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
   3346   case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
   3347   case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
   3348   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
   3349   case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
   3350   case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
   3351   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
   3352   case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
   3353   case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
   3354   case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
   3355   case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
   3356   case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
   3357   case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
   3358   case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
   3359   case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
   3360   case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
   3361   case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
   3362   case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
   3363   case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: {
   3364     Info.opc = getOpcForTextureInstr(Intrinsic);
   3365     Info.memVT = MVT::v4f32;
   3366     Info.ptrVal = nullptr;
   3367     Info.offset = 0;
   3368     Info.vol = 0;
   3369     Info.readMem = true;
   3370     Info.writeMem = false;
   3371     Info.align = 16;
   3372     return true;
   3373   }
   3374   case Intrinsic::nvvm_tex_1d_v4s32_s32:
   3375   case Intrinsic::nvvm_tex_1d_v4s32_f32:
   3376   case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
   3377   case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
   3378   case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
   3379   case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
   3380   case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
   3381   case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
   3382   case Intrinsic::nvvm_tex_2d_v4s32_s32:
   3383   case Intrinsic::nvvm_tex_2d_v4s32_f32:
   3384   case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
   3385   case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
   3386   case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
   3387   case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
   3388   case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
   3389   case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
   3390   case Intrinsic::nvvm_tex_3d_v4s32_s32:
   3391   case Intrinsic::nvvm_tex_3d_v4s32_f32:
   3392   case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
   3393   case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
   3394   case Intrinsic::nvvm_tex_cube_v4s32_f32:
   3395   case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
   3396   case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
   3397   case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
   3398   case Intrinsic::nvvm_tex_cube_v4u32_f32:
   3399   case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
   3400   case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
   3401   case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
   3402   case Intrinsic::nvvm_tex_1d_v4u32_s32:
   3403   case Intrinsic::nvvm_tex_1d_v4u32_f32:
   3404   case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
   3405   case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
   3406   case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
   3407   case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
   3408   case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
   3409   case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
   3410   case Intrinsic::nvvm_tex_2d_v4u32_s32:
   3411   case Intrinsic::nvvm_tex_2d_v4u32_f32:
   3412   case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
   3413   case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
   3414   case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
   3415   case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
   3416   case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
   3417   case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
   3418   case Intrinsic::nvvm_tex_3d_v4u32_s32:
   3419   case Intrinsic::nvvm_tex_3d_v4u32_f32:
   3420   case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
   3421   case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
   3422   case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
   3423   case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
   3424   case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
   3425   case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
   3426   case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
   3427   case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
   3428   case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
   3429   case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
   3430   case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
   3431   case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
   3432   case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
   3433   case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
   3434   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
   3435   case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
   3436   case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
   3437   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
   3438   case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
   3439   case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
   3440   case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
   3441   case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
   3442   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
   3443   case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
   3444   case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
   3445   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
   3446   case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
   3447   case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
   3448   case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
   3449   case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
   3450   case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
   3451   case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
   3452   case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
   3453   case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
   3454   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
   3455   case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
   3456   case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
   3457   case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
   3458   case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
   3459   case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
   3460   case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
   3461   case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
   3462   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
   3463   case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
   3464   case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
   3465   case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
   3466   case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
   3467   case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
   3468   case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
   3469   case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
   3470   case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
   3471   case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
   3472   case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
   3473   case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
   3474   case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
   3475   case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
   3476   case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
   3477   case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
   3478   case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
   3479   case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
   3480   case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
   3481   case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
   3482   case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
   3483   case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
   3484   case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
   3485   case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: {
   3486     Info.opc = getOpcForTextureInstr(Intrinsic);
   3487     Info.memVT = MVT::v4i32;
   3488     Info.ptrVal = nullptr;
   3489     Info.offset = 0;
   3490     Info.vol = 0;
   3491     Info.readMem = true;
   3492     Info.writeMem = false;
   3493     Info.align = 16;
   3494     return true;
   3495   }
   3496   case Intrinsic::nvvm_suld_1d_i8_clamp:
   3497   case Intrinsic::nvvm_suld_1d_v2i8_clamp:
   3498   case Intrinsic::nvvm_suld_1d_v4i8_clamp:
   3499   case Intrinsic::nvvm_suld_1d_array_i8_clamp:
   3500   case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
   3501   case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
   3502   case Intrinsic::nvvm_suld_2d_i8_clamp:
   3503   case Intrinsic::nvvm_suld_2d_v2i8_clamp:
   3504   case Intrinsic::nvvm_suld_2d_v4i8_clamp:
   3505   case Intrinsic::nvvm_suld_2d_array_i8_clamp:
   3506   case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
   3507   case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
   3508   case Intrinsic::nvvm_suld_3d_i8_clamp:
   3509   case Intrinsic::nvvm_suld_3d_v2i8_clamp:
   3510   case Intrinsic::nvvm_suld_3d_v4i8_clamp:
   3511   case Intrinsic::nvvm_suld_1d_i8_trap:
   3512   case Intrinsic::nvvm_suld_1d_v2i8_trap:
   3513   case Intrinsic::nvvm_suld_1d_v4i8_trap:
   3514   case Intrinsic::nvvm_suld_1d_array_i8_trap:
   3515   case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
   3516   case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
   3517   case Intrinsic::nvvm_suld_2d_i8_trap:
   3518   case Intrinsic::nvvm_suld_2d_v2i8_trap:
   3519   case Intrinsic::nvvm_suld_2d_v4i8_trap:
   3520   case Intrinsic::nvvm_suld_2d_array_i8_trap:
   3521   case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
   3522   case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
   3523   case Intrinsic::nvvm_suld_3d_i8_trap:
   3524   case Intrinsic::nvvm_suld_3d_v2i8_trap:
   3525   case Intrinsic::nvvm_suld_3d_v4i8_trap:
   3526   case Intrinsic::nvvm_suld_1d_i8_zero:
   3527   case Intrinsic::nvvm_suld_1d_v2i8_zero:
   3528   case Intrinsic::nvvm_suld_1d_v4i8_zero:
   3529   case Intrinsic::nvvm_suld_1d_array_i8_zero:
   3530   case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
   3531   case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
   3532   case Intrinsic::nvvm_suld_2d_i8_zero:
   3533   case Intrinsic::nvvm_suld_2d_v2i8_zero:
   3534   case Intrinsic::nvvm_suld_2d_v4i8_zero:
   3535   case Intrinsic::nvvm_suld_2d_array_i8_zero:
   3536   case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
   3537   case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
   3538   case Intrinsic::nvvm_suld_3d_i8_zero:
   3539   case Intrinsic::nvvm_suld_3d_v2i8_zero:
   3540   case Intrinsic::nvvm_suld_3d_v4i8_zero: {
   3541     Info.opc = getOpcForSurfaceInstr(Intrinsic);
   3542     Info.memVT = MVT::i8;
   3543     Info.ptrVal = nullptr;
   3544     Info.offset = 0;
   3545     Info.vol = 0;
   3546     Info.readMem = true;
   3547     Info.writeMem = false;
   3548     Info.align = 16;
   3549     return true;
   3550   }
   3551   case Intrinsic::nvvm_suld_1d_i16_clamp:
   3552   case Intrinsic::nvvm_suld_1d_v2i16_clamp:
   3553   case Intrinsic::nvvm_suld_1d_v4i16_clamp:
   3554   case Intrinsic::nvvm_suld_1d_array_i16_clamp:
   3555   case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
   3556   case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
   3557   case Intrinsic::nvvm_suld_2d_i16_clamp:
   3558   case Intrinsic::nvvm_suld_2d_v2i16_clamp:
   3559   case Intrinsic::nvvm_suld_2d_v4i16_clamp:
   3560   case Intrinsic::nvvm_suld_2d_array_i16_clamp:
   3561   case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
   3562   case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
   3563   case Intrinsic::nvvm_suld_3d_i16_clamp:
   3564   case Intrinsic::nvvm_suld_3d_v2i16_clamp:
   3565   case Intrinsic::nvvm_suld_3d_v4i16_clamp:
   3566   case Intrinsic::nvvm_suld_1d_i16_trap:
   3567   case Intrinsic::nvvm_suld_1d_v2i16_trap:
   3568   case Intrinsic::nvvm_suld_1d_v4i16_trap:
   3569   case Intrinsic::nvvm_suld_1d_array_i16_trap:
   3570   case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
   3571   case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
   3572   case Intrinsic::nvvm_suld_2d_i16_trap:
   3573   case Intrinsic::nvvm_suld_2d_v2i16_trap:
   3574   case Intrinsic::nvvm_suld_2d_v4i16_trap:
   3575   case Intrinsic::nvvm_suld_2d_array_i16_trap:
   3576   case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
   3577   case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
   3578   case Intrinsic::nvvm_suld_3d_i16_trap:
   3579   case Intrinsic::nvvm_suld_3d_v2i16_trap:
   3580   case Intrinsic::nvvm_suld_3d_v4i16_trap:
   3581   case Intrinsic::nvvm_suld_1d_i16_zero:
   3582   case Intrinsic::nvvm_suld_1d_v2i16_zero:
   3583   case Intrinsic::nvvm_suld_1d_v4i16_zero:
   3584   case Intrinsic::nvvm_suld_1d_array_i16_zero:
   3585   case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
   3586   case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
   3587   case Intrinsic::nvvm_suld_2d_i16_zero:
   3588   case Intrinsic::nvvm_suld_2d_v2i16_zero:
   3589   case Intrinsic::nvvm_suld_2d_v4i16_zero:
   3590   case Intrinsic::nvvm_suld_2d_array_i16_zero:
   3591   case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
   3592   case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
   3593   case Intrinsic::nvvm_suld_3d_i16_zero:
   3594   case Intrinsic::nvvm_suld_3d_v2i16_zero:
   3595   case Intrinsic::nvvm_suld_3d_v4i16_zero: {
   3596     Info.opc = getOpcForSurfaceInstr(Intrinsic);
   3597     Info.memVT = MVT::i16;
   3598     Info.ptrVal = nullptr;
   3599     Info.offset = 0;
   3600     Info.vol = 0;
   3601     Info.readMem = true;
   3602     Info.writeMem = false;
   3603     Info.align = 16;
   3604     return true;
   3605   }
   3606   case Intrinsic::nvvm_suld_1d_i32_clamp:
   3607   case Intrinsic::nvvm_suld_1d_v2i32_clamp:
   3608   case Intrinsic::nvvm_suld_1d_v4i32_clamp:
   3609   case Intrinsic::nvvm_suld_1d_array_i32_clamp:
   3610   case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
   3611   case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
   3612   case Intrinsic::nvvm_suld_2d_i32_clamp:
   3613   case Intrinsic::nvvm_suld_2d_v2i32_clamp:
   3614   case Intrinsic::nvvm_suld_2d_v4i32_clamp:
   3615   case Intrinsic::nvvm_suld_2d_array_i32_clamp:
   3616   case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
   3617   case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
   3618   case Intrinsic::nvvm_suld_3d_i32_clamp:
   3619   case Intrinsic::nvvm_suld_3d_v2i32_clamp:
   3620   case Intrinsic::nvvm_suld_3d_v4i32_clamp:
   3621   case Intrinsic::nvvm_suld_1d_i32_trap:
   3622   case Intrinsic::nvvm_suld_1d_v2i32_trap:
   3623   case Intrinsic::nvvm_suld_1d_v4i32_trap:
   3624   case Intrinsic::nvvm_suld_1d_array_i32_trap:
   3625   case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
   3626   case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
   3627   case Intrinsic::nvvm_suld_2d_i32_trap:
   3628   case Intrinsic::nvvm_suld_2d_v2i32_trap:
   3629   case Intrinsic::nvvm_suld_2d_v4i32_trap:
   3630   case Intrinsic::nvvm_suld_2d_array_i32_trap:
   3631   case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
   3632   case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
   3633   case Intrinsic::nvvm_suld_3d_i32_trap:
   3634   case Intrinsic::nvvm_suld_3d_v2i32_trap:
   3635   case Intrinsic::nvvm_suld_3d_v4i32_trap:
   3636   case Intrinsic::nvvm_suld_1d_i32_zero:
   3637   case Intrinsic::nvvm_suld_1d_v2i32_zero:
   3638   case Intrinsic::nvvm_suld_1d_v4i32_zero:
   3639   case Intrinsic::nvvm_suld_1d_array_i32_zero:
   3640   case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
   3641   case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
   3642   case Intrinsic::nvvm_suld_2d_i32_zero:
   3643   case Intrinsic::nvvm_suld_2d_v2i32_zero:
   3644   case Intrinsic::nvvm_suld_2d_v4i32_zero:
   3645   case Intrinsic::nvvm_suld_2d_array_i32_zero:
   3646   case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
   3647   case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
   3648   case Intrinsic::nvvm_suld_3d_i32_zero:
   3649   case Intrinsic::nvvm_suld_3d_v2i32_zero:
   3650   case Intrinsic::nvvm_suld_3d_v4i32_zero: {
   3651     Info.opc = getOpcForSurfaceInstr(Intrinsic);
   3652     Info.memVT = MVT::i32;
   3653     Info.ptrVal = nullptr;
   3654     Info.offset = 0;
   3655     Info.vol = 0;
   3656     Info.readMem = true;
   3657     Info.writeMem = false;
   3658     Info.align = 16;
   3659     return true;
   3660   }
   3661   case Intrinsic::nvvm_suld_1d_i64_clamp:
   3662   case Intrinsic::nvvm_suld_1d_v2i64_clamp:
   3663   case Intrinsic::nvvm_suld_1d_array_i64_clamp:
   3664   case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
   3665   case Intrinsic::nvvm_suld_2d_i64_clamp:
   3666   case Intrinsic::nvvm_suld_2d_v2i64_clamp:
   3667   case Intrinsic::nvvm_suld_2d_array_i64_clamp:
   3668   case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
   3669   case Intrinsic::nvvm_suld_3d_i64_clamp:
   3670   case Intrinsic::nvvm_suld_3d_v2i64_clamp:
   3671   case Intrinsic::nvvm_suld_1d_i64_trap:
   3672   case Intrinsic::nvvm_suld_1d_v2i64_trap:
   3673   case Intrinsic::nvvm_suld_1d_array_i64_trap:
   3674   case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
   3675   case Intrinsic::nvvm_suld_2d_i64_trap:
   3676   case Intrinsic::nvvm_suld_2d_v2i64_trap:
   3677   case Intrinsic::nvvm_suld_2d_array_i64_trap:
   3678   case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
   3679   case Intrinsic::nvvm_suld_3d_i64_trap:
   3680   case Intrinsic::nvvm_suld_3d_v2i64_trap:
   3681   case Intrinsic::nvvm_suld_1d_i64_zero:
   3682   case Intrinsic::nvvm_suld_1d_v2i64_zero:
   3683   case Intrinsic::nvvm_suld_1d_array_i64_zero:
   3684   case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
   3685   case Intrinsic::nvvm_suld_2d_i64_zero:
   3686   case Intrinsic::nvvm_suld_2d_v2i64_zero:
   3687   case Intrinsic::nvvm_suld_2d_array_i64_zero:
   3688   case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
   3689   case Intrinsic::nvvm_suld_3d_i64_zero:
   3690   case Intrinsic::nvvm_suld_3d_v2i64_zero: {
   3691     Info.opc = getOpcForSurfaceInstr(Intrinsic);
   3692     Info.memVT = MVT::i64;
   3693     Info.ptrVal = nullptr;
   3694     Info.offset = 0;
   3695     Info.vol = 0;
   3696     Info.readMem = true;
   3697     Info.writeMem = false;
   3698     Info.align = 16;
   3699     return true;
   3700   }
   3701   }
   3702   return false;
   3703 }
   3704 
   3705 /// isLegalAddressingMode - Return true if the addressing mode represented
   3706 /// by AM is legal for this target, for a load/store of the specified type.
   3707 /// Used to guide target specific optimizations, like loop strength reduction
   3708 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
   3709 /// (CodeGenPrepare.cpp)
   3710 bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
   3711                                                 const AddrMode &AM, Type *Ty,
   3712                                                 unsigned AS) const {
   3713 
   3714   // AddrMode - This represents an addressing mode of:
   3715   //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
   3716   //
   3717   // The legal address modes are
   3718   // - [avar]
   3719   // - [areg]
   3720   // - [areg+immoff]
   3721   // - [immAddr]
   3722 
   3723   if (AM.BaseGV) {
   3724     return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
   3725   }
   3726 
   3727   switch (AM.Scale) {
   3728   case 0: // "r", "r+i" or "i" is allowed
   3729     break;
   3730   case 1:
   3731     if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
   3732       return false;
   3733     // Otherwise we have r+i.
   3734     break;
   3735   default:
   3736     // No scale > 1 is allowed
   3737     return false;
   3738   }
   3739   return true;
   3740 }
   3741 
   3742 //===----------------------------------------------------------------------===//
   3743 //                         NVPTX Inline Assembly Support
   3744 //===----------------------------------------------------------------------===//
   3745 
   3746 /// getConstraintType - Given a constraint letter, return the type of
   3747 /// constraint it is for this target.
   3748 NVPTXTargetLowering::ConstraintType
   3749 NVPTXTargetLowering::getConstraintType(StringRef Constraint) const {
   3750   if (Constraint.size() == 1) {
   3751     switch (Constraint[0]) {
   3752     default:
   3753       break;
   3754     case 'b':
   3755     case 'r':
   3756     case 'h':
   3757     case 'c':
   3758     case 'l':
   3759     case 'f':
   3760     case 'd':
   3761     case '0':
   3762     case 'N':
   3763       return C_RegisterClass;
   3764     }
   3765   }
   3766   return TargetLowering::getConstraintType(Constraint);
   3767 }
   3768 
   3769 std::pair<unsigned, const TargetRegisterClass *>
   3770 NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
   3771                                                   StringRef Constraint,
   3772                                                   MVT VT) const {
   3773   if (Constraint.size() == 1) {
   3774     switch (Constraint[0]) {
   3775     case 'b':
   3776       return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
   3777     case 'c':
   3778       return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
   3779     case 'h':
   3780       return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
   3781     case 'r':
   3782       return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
   3783     case 'l':
   3784     case 'N':
   3785       return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
   3786     case 'f':
   3787       return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
   3788     case 'd':
   3789       return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
   3790     }
   3791   }
   3792   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
   3793 }
   3794 
   3795 //===----------------------------------------------------------------------===//
   3796 //                         NVPTX DAG Combining
   3797 //===----------------------------------------------------------------------===//
   3798 
   3799 bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
   3800                                    CodeGenOpt::Level OptLevel) const {
   3801   const Function *F = MF.getFunction();
   3802   const TargetOptions &TO = MF.getTarget().Options;
   3803 
   3804   // Always honor command-line argument
   3805   if (FMAContractLevelOpt.getNumOccurrences() > 0) {
   3806     return FMAContractLevelOpt > 0;
   3807   } else if (OptLevel == 0) {
   3808     // Do not contract if we're not optimizing the code
   3809     return false;
   3810   } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) {
   3811     // Honor TargetOptions flags that explicitly say fusion is okay
   3812     return true;
   3813   } else if (F->hasFnAttribute("unsafe-fp-math")) {
   3814     // Check for unsafe-fp-math=true coming from Clang
   3815     Attribute Attr = F->getFnAttribute("unsafe-fp-math");
   3816     StringRef Val = Attr.getValueAsString();
   3817     if (Val == "true")
   3818       return true;
   3819   }
   3820 
   3821   // We did not have a clear indication that fusion is allowed, so assume not
   3822   return false;
   3823 }
   3824 
   3825 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
   3826 /// operands N0 and N1.  This is a helper for PerformADDCombine that is
   3827 /// called with the default operands, and if that fails, with commuted
   3828 /// operands.
   3829 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
   3830                                            TargetLowering::DAGCombinerInfo &DCI,
   3831                                              const NVPTXSubtarget &Subtarget,
   3832                                              CodeGenOpt::Level OptLevel) {
   3833   SelectionDAG  &DAG = DCI.DAG;
   3834   // Skip non-integer, non-scalar case
   3835   EVT VT=N0.getValueType();
   3836   if (VT.isVector())
   3837     return SDValue();
   3838 
   3839   // fold (add (mul a, b), c) -> (mad a, b, c)
   3840   //
   3841   if (N0.getOpcode() == ISD::MUL) {
   3842     assert (VT.isInteger());
   3843     // For integer:
   3844     // Since integer multiply-add costs the same as integer multiply
   3845     // but is more costly than integer add, do the fusion only when
   3846     // the mul is only used in the add.
   3847     if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
   3848         !N0.getNode()->hasOneUse())
   3849       return SDValue();
   3850 
   3851     // Do the folding
   3852     return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
   3853                        N0.getOperand(0), N0.getOperand(1), N1);
   3854   }
   3855   else if (N0.getOpcode() == ISD::FMUL) {
   3856     if (VT == MVT::f32 || VT == MVT::f64) {
   3857       const auto *TLI = static_cast<const NVPTXTargetLowering *>(
   3858           &DAG.getTargetLoweringInfo());
   3859       if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
   3860         return SDValue();
   3861 
   3862       // For floating point:
   3863       // Do the fusion only when the mul has less than 5 uses and all
   3864       // are add.
   3865       // The heuristic is that if a use is not an add, then that use
   3866       // cannot be fused into fma, therefore mul is still needed anyway.
   3867       // If there are more than 4 uses, even if they are all add, fusing
   3868       // them will increase register pressue.
   3869       //
   3870       int numUses = 0;
   3871       int nonAddCount = 0;
   3872       for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
   3873            UE = N0.getNode()->use_end();
   3874            UI != UE; ++UI) {
   3875         numUses++;
   3876         SDNode *User = *UI;
   3877         if (User->getOpcode() != ISD::FADD)
   3878           ++nonAddCount;
   3879       }
   3880       if (numUses >= 5)
   3881         return SDValue();
   3882       if (nonAddCount) {
   3883         int orderNo = N->getIROrder();
   3884         int orderNo2 = N0.getNode()->getIROrder();
   3885         // simple heuristics here for considering potential register
   3886         // pressure, the logics here is that the differnce are used
   3887         // to measure the distance between def and use, the longer distance
   3888         // more likely cause register pressure.
   3889         if (orderNo - orderNo2 < 500)
   3890           return SDValue();
   3891 
   3892         // Now, check if at least one of the FMUL's operands is live beyond the node N,
   3893         // which guarantees that the FMA will not increase register pressure at node N.
   3894         bool opIsLive = false;
   3895         const SDNode *left = N0.getOperand(0).getNode();
   3896         const SDNode *right = N0.getOperand(1).getNode();
   3897 
   3898         if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
   3899           opIsLive = true;
   3900 
   3901         if (!opIsLive)
   3902           for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) {
   3903             SDNode *User = *UI;
   3904             int orderNo3 = User->getIROrder();
   3905             if (orderNo3 > orderNo) {
   3906               opIsLive = true;
   3907               break;
   3908             }
   3909           }
   3910 
   3911         if (!opIsLive)
   3912           for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) {
   3913             SDNode *User = *UI;
   3914             int orderNo3 = User->getIROrder();
   3915             if (orderNo3 > orderNo) {
   3916               opIsLive = true;
   3917               break;
   3918             }
   3919           }
   3920 
   3921         if (!opIsLive)
   3922           return SDValue();
   3923       }
   3924 
   3925       return DAG.getNode(ISD::FMA, SDLoc(N), VT,
   3926                          N0.getOperand(0), N0.getOperand(1), N1);
   3927     }
   3928   }
   3929 
   3930   return SDValue();
   3931 }
   3932 
   3933 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
   3934 ///
   3935 static SDValue PerformADDCombine(SDNode *N,
   3936                                  TargetLowering::DAGCombinerInfo &DCI,
   3937                                  const NVPTXSubtarget &Subtarget,
   3938                                  CodeGenOpt::Level OptLevel) {
   3939   SDValue N0 = N->getOperand(0);
   3940   SDValue N1 = N->getOperand(1);
   3941 
   3942   // First try with the default operand order.
   3943   SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget,
   3944                                                  OptLevel);
   3945   if (Result.getNode())
   3946     return Result;
   3947 
   3948   // If that didn't work, try again with the operands commuted.
   3949   return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
   3950 }
   3951 
   3952 static SDValue PerformANDCombine(SDNode *N,
   3953                                  TargetLowering::DAGCombinerInfo &DCI) {
   3954   // The type legalizer turns a vector load of i8 values into a zextload to i16
   3955   // registers, optionally ANY_EXTENDs it (if target type is integer),
   3956   // and ANDs off the high 8 bits. Since we turn this load into a
   3957   // target-specific DAG node, the DAG combiner fails to eliminate these AND
   3958   // nodes. Do that here.
   3959   SDValue Val = N->getOperand(0);
   3960   SDValue Mask = N->getOperand(1);
   3961 
   3962   if (isa<ConstantSDNode>(Val)) {
   3963     std::swap(Val, Mask);
   3964   }
   3965 
   3966   SDValue AExt;
   3967   // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
   3968   if (Val.getOpcode() == ISD::ANY_EXTEND) {
   3969     AExt = Val;
   3970     Val = Val->getOperand(0);
   3971   }
   3972 
   3973   if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
   3974     Val = Val->getOperand(0);
   3975   }
   3976 
   3977   if (Val->getOpcode() == NVPTXISD::LoadV2 ||
   3978       Val->getOpcode() == NVPTXISD::LoadV4) {
   3979     ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
   3980     if (!MaskCnst) {
   3981       // Not an AND with a constant
   3982       return SDValue();
   3983     }
   3984 
   3985     uint64_t MaskVal = MaskCnst->getZExtValue();
   3986     if (MaskVal != 0xff) {
   3987       // Not an AND that chops off top 8 bits
   3988       return SDValue();
   3989     }
   3990 
   3991     MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
   3992     if (!Mem) {
   3993       // Not a MemSDNode?!?
   3994       return SDValue();
   3995     }
   3996 
   3997     EVT MemVT = Mem->getMemoryVT();
   3998     if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
   3999       // We only handle the i8 case
   4000       return SDValue();
   4001     }
   4002 
   4003     unsigned ExtType =
   4004       cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))->
   4005         getZExtValue();
   4006     if (ExtType == ISD::SEXTLOAD) {
   4007       // If for some reason the load is a sextload, the and is needed to zero
   4008       // out the high 8 bits
   4009       return SDValue();
   4010     }
   4011 
   4012     bool AddTo = false;
   4013     if (AExt.getNode() != 0) {
   4014       // Re-insert the ext as a zext.
   4015       Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
   4016                             AExt.getValueType(), Val);
   4017       AddTo = true;
   4018     }
   4019 
   4020     // If we get here, the AND is unnecessary.  Just replace it with the load
   4021     DCI.CombineTo(N, Val, AddTo);
   4022   }
   4023 
   4024   return SDValue();
   4025 }
   4026 
   4027 static SDValue PerformSELECTCombine(SDNode *N,
   4028                                     TargetLowering::DAGCombinerInfo &DCI) {
   4029   // Currently this detects patterns for integer min and max and
   4030   // lowers them to PTX-specific intrinsics that enable hardware
   4031   // support.
   4032 
   4033   const SDValue Cond = N->getOperand(0);
   4034   if (Cond.getOpcode() != ISD::SETCC) return SDValue();
   4035 
   4036   const SDValue LHS = Cond.getOperand(0);
   4037   const SDValue RHS = Cond.getOperand(1);
   4038   const SDValue True = N->getOperand(1);
   4039   const SDValue False = N->getOperand(2);
   4040   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
   4041     return SDValue();
   4042 
   4043   const EVT VT = N->getValueType(0);
   4044   if (VT != MVT::i32 && VT != MVT::i64) return SDValue();
   4045 
   4046   const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
   4047   SDValue Larger;  // The larger of LHS and RHS when condition is true.
   4048   switch (CC) {
   4049     case ISD::SETULT:
   4050     case ISD::SETULE:
   4051     case ISD::SETLT:
   4052     case ISD::SETLE:
   4053       Larger = RHS;
   4054       break;
   4055 
   4056     case ISD::SETGT:
   4057     case ISD::SETGE:
   4058     case ISD::SETUGT:
   4059     case ISD::SETUGE:
   4060       Larger = LHS;
   4061       break;
   4062 
   4063     default:
   4064       return SDValue();
   4065   }
   4066   const bool IsMax = (Larger == True);
   4067   const bool IsSigned = ISD::isSignedIntSetCC(CC);
   4068 
   4069   unsigned IntrinsicId;
   4070   if (VT == MVT::i32) {
   4071     if (IsSigned)
   4072       IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i;
   4073     else
   4074       IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui;
   4075   } else {
   4076     assert(VT == MVT::i64);
   4077     if (IsSigned)
   4078       IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll;
   4079     else
   4080       IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull;
   4081   }
   4082 
   4083   SDLoc DL(N);
   4084   return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
   4085                          DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS);
   4086 }
   4087 
   4088 enum OperandSignedness {
   4089   Signed = 0,
   4090   Unsigned,
   4091   Unknown
   4092 };
   4093 
   4094 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
   4095 /// that can be demoted to \p OptSize bits without loss of information. The
   4096 /// signedness of the operand, if determinable, is placed in \p S.
   4097 static bool IsMulWideOperandDemotable(SDValue Op,
   4098                                       unsigned OptSize,
   4099                                       OperandSignedness &S) {
   4100   S = Unknown;
   4101 
   4102   if (Op.getOpcode() == ISD::SIGN_EXTEND ||
   4103       Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
   4104     EVT OrigVT = Op.getOperand(0).getValueType();
   4105     if (OrigVT.getSizeInBits() <= OptSize) {
   4106       S = Signed;
   4107       return true;
   4108     }
   4109   } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
   4110     EVT OrigVT = Op.getOperand(0).getValueType();
   4111     if (OrigVT.getSizeInBits() <= OptSize) {
   4112       S = Unsigned;
   4113       return true;
   4114     }
   4115   }
   4116 
   4117   return false;
   4118 }
   4119 
   4120 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
   4121 /// be demoted to \p OptSize bits without loss of information. If the operands
   4122 /// contain a constant, it should appear as the RHS operand. The signedness of
   4123 /// the operands is placed in \p IsSigned.
   4124 static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
   4125                                         unsigned OptSize,
   4126                                         bool &IsSigned) {
   4127 
   4128   OperandSignedness LHSSign;
   4129 
   4130   // The LHS operand must be a demotable op
   4131   if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
   4132     return false;
   4133 
   4134   // We should have been able to determine the signedness from the LHS
   4135   if (LHSSign == Unknown)
   4136     return false;
   4137 
   4138   IsSigned = (LHSSign == Signed);
   4139 
   4140   // The RHS can be a demotable op or a constant
   4141   if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
   4142     APInt Val = CI->getAPIntValue();
   4143     if (LHSSign == Unsigned) {
   4144       return Val.isIntN(OptSize);
   4145     } else {
   4146       return Val.isSignedIntN(OptSize);
   4147     }
   4148   } else {
   4149     OperandSignedness RHSSign;
   4150     if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
   4151       return false;
   4152 
   4153     return LHSSign == RHSSign;
   4154   }
   4155 }
   4156 
   4157 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
   4158 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
   4159 /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
   4160 /// amount.
   4161 static SDValue TryMULWIDECombine(SDNode *N,
   4162                                  TargetLowering::DAGCombinerInfo &DCI) {
   4163   EVT MulType = N->getValueType(0);
   4164   if (MulType != MVT::i32 && MulType != MVT::i64) {
   4165     return SDValue();
   4166   }
   4167 
   4168   SDLoc DL(N);
   4169   unsigned OptSize = MulType.getSizeInBits() >> 1;
   4170   SDValue LHS = N->getOperand(0);
   4171   SDValue RHS = N->getOperand(1);
   4172 
   4173   // Canonicalize the multiply so the constant (if any) is on the right
   4174   if (N->getOpcode() == ISD::MUL) {
   4175     if (isa<ConstantSDNode>(LHS)) {
   4176       std::swap(LHS, RHS);
   4177     }
   4178   }
   4179 
   4180   // If we have a SHL, determine the actual multiply amount
   4181   if (N->getOpcode() == ISD::SHL) {
   4182     ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
   4183     if (!ShlRHS) {
   4184       return SDValue();
   4185     }
   4186 
   4187     APInt ShiftAmt = ShlRHS->getAPIntValue();
   4188     unsigned BitWidth = MulType.getSizeInBits();
   4189     if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
   4190       APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
   4191       RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
   4192     } else {
   4193       return SDValue();
   4194     }
   4195   }
   4196 
   4197   bool Signed;
   4198   // Verify that our operands are demotable
   4199   if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
   4200     return SDValue();
   4201   }
   4202 
   4203   EVT DemotedVT;
   4204   if (MulType == MVT::i32) {
   4205     DemotedVT = MVT::i16;
   4206   } else {
   4207     DemotedVT = MVT::i32;
   4208   }
   4209 
   4210   // Truncate the operands to the correct size. Note that these are just for
   4211   // type consistency and will (likely) be eliminated in later phases.
   4212   SDValue TruncLHS =
   4213     DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
   4214   SDValue TruncRHS =
   4215     DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
   4216 
   4217   unsigned Opc;
   4218   if (Signed) {
   4219     Opc = NVPTXISD::MUL_WIDE_SIGNED;
   4220   } else {
   4221     Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
   4222   }
   4223 
   4224   return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
   4225 }
   4226 
   4227 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
   4228 static SDValue PerformMULCombine(SDNode *N,
   4229                                  TargetLowering::DAGCombinerInfo &DCI,
   4230                                  CodeGenOpt::Level OptLevel) {
   4231   if (OptLevel > 0) {
   4232     // Try mul.wide combining at OptLevel > 0
   4233     SDValue Ret = TryMULWIDECombine(N, DCI);
   4234     if (Ret.getNode())
   4235       return Ret;
   4236   }
   4237 
   4238   return SDValue();
   4239 }
   4240 
   4241 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
   4242 static SDValue PerformSHLCombine(SDNode *N,
   4243                                  TargetLowering::DAGCombinerInfo &DCI,
   4244                                  CodeGenOpt::Level OptLevel) {
   4245   if (OptLevel > 0) {
   4246     // Try mul.wide combining at OptLevel > 0
   4247     SDValue Ret = TryMULWIDECombine(N, DCI);
   4248     if (Ret.getNode())
   4249       return Ret;
   4250   }
   4251 
   4252   return SDValue();
   4253 }
   4254 
   4255 SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
   4256                                                DAGCombinerInfo &DCI) const {
   4257   CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
   4258   switch (N->getOpcode()) {
   4259     default: break;
   4260     case ISD::ADD:
   4261     case ISD::FADD:
   4262       return PerformADDCombine(N, DCI, STI, OptLevel);
   4263     case ISD::MUL:
   4264       return PerformMULCombine(N, DCI, OptLevel);
   4265     case ISD::SHL:
   4266       return PerformSHLCombine(N, DCI, OptLevel);
   4267     case ISD::AND:
   4268       return PerformANDCombine(N, DCI);
   4269     case ISD::SELECT:
   4270       return PerformSELECTCombine(N, DCI);
   4271   }
   4272   return SDValue();
   4273 }
   4274 
   4275 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
   4276 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
   4277                               SmallVectorImpl<SDValue> &Results) {
   4278   EVT ResVT = N->getValueType(0);
   4279   SDLoc DL(N);
   4280 
   4281   assert(ResVT.isVector() && "Vector load must have vector type");
   4282 
   4283   // We only handle "native" vector sizes for now, e.g. <4 x double> is not
   4284   // legal.  We can (and should) split that into 2 loads of <2 x double> here
   4285   // but I'm leaving that as a TODO for now.
   4286   assert(ResVT.isSimple() && "Can only handle simple types");
   4287   switch (ResVT.getSimpleVT().SimpleTy) {
   4288   default:
   4289     return;
   4290   case MVT::v2i8:
   4291   case MVT::v2i16:
   4292   case MVT::v2i32:
   4293   case MVT::v2i64:
   4294   case MVT::v2f32:
   4295   case MVT::v2f64:
   4296   case MVT::v4i8:
   4297   case MVT::v4i16:
   4298   case MVT::v4i32:
   4299   case MVT::v4f32:
   4300     // This is a "native" vector type
   4301     break;
   4302   }
   4303 
   4304   LoadSDNode *LD = cast<LoadSDNode>(N);
   4305 
   4306   unsigned Align = LD->getAlignment();
   4307   auto &TD = DAG.getDataLayout();
   4308   unsigned PrefAlign =
   4309       TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
   4310   if (Align < PrefAlign) {
   4311     // This load is not sufficiently aligned, so bail out and let this vector
   4312     // load be scalarized.  Note that we may still be able to emit smaller
   4313     // vector loads.  For example, if we are loading a <4 x float> with an
   4314     // alignment of 8, this check will fail but the legalizer will try again
   4315     // with 2 x <2 x float>, which will succeed with an alignment of 8.
   4316     return;
   4317   }
   4318 
   4319   EVT EltVT = ResVT.getVectorElementType();
   4320   unsigned NumElts = ResVT.getVectorNumElements();
   4321 
   4322   // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
   4323   // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
   4324   // loaded type to i16 and propagate the "real" type as the memory type.
   4325   bool NeedTrunc = false;
   4326   if (EltVT.getSizeInBits() < 16) {
   4327     EltVT = MVT::i16;
   4328     NeedTrunc = true;
   4329   }
   4330 
   4331   unsigned Opcode = 0;
   4332   SDVTList LdResVTs;
   4333 
   4334   switch (NumElts) {
   4335   default:
   4336     return;
   4337   case 2:
   4338     Opcode = NVPTXISD::LoadV2;
   4339     LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
   4340     break;
   4341   case 4: {
   4342     Opcode = NVPTXISD::LoadV4;
   4343     EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
   4344     LdResVTs = DAG.getVTList(ListVTs);
   4345     break;
   4346   }
   4347   }
   4348 
   4349   // Copy regular operands
   4350   SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
   4351 
   4352   // The select routine does not have access to the LoadSDNode instance, so
   4353   // pass along the extension information
   4354   OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
   4355 
   4356   SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
   4357                                           LD->getMemoryVT(),
   4358                                           LD->getMemOperand());
   4359 
   4360   SmallVector<SDValue, 4> ScalarRes;
   4361 
   4362   for (unsigned i = 0; i < NumElts; ++i) {
   4363     SDValue Res = NewLD.getValue(i);
   4364     if (NeedTrunc)
   4365       Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
   4366     ScalarRes.push_back(Res);
   4367   }
   4368 
   4369   SDValue LoadChain = NewLD.getValue(NumElts);
   4370 
   4371   SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
   4372 
   4373   Results.push_back(BuildVec);
   4374   Results.push_back(LoadChain);
   4375 }
   4376 
   4377 static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
   4378                                      SmallVectorImpl<SDValue> &Results) {
   4379   SDValue Chain = N->getOperand(0);
   4380   SDValue Intrin = N->getOperand(1);
   4381   SDLoc DL(N);
   4382 
   4383   // Get the intrinsic ID
   4384   unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
   4385   switch (IntrinNo) {
   4386   default:
   4387     return;
   4388   case Intrinsic::nvvm_ldg_global_i:
   4389   case Intrinsic::nvvm_ldg_global_f:
   4390   case Intrinsic::nvvm_ldg_global_p:
   4391   case Intrinsic::nvvm_ldu_global_i:
   4392   case Intrinsic::nvvm_ldu_global_f:
   4393   case Intrinsic::nvvm_ldu_global_p: {
   4394     EVT ResVT = N->getValueType(0);
   4395 
   4396     if (ResVT.isVector()) {
   4397       // Vector LDG/LDU
   4398 
   4399       unsigned NumElts = ResVT.getVectorNumElements();
   4400       EVT EltVT = ResVT.getVectorElementType();
   4401 
   4402       // Since LDU/LDG are target nodes, we cannot rely on DAG type
   4403       // legalization.
   4404       // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
   4405       // loaded type to i16 and propagate the "real" type as the memory type.
   4406       bool NeedTrunc = false;
   4407       if (EltVT.getSizeInBits() < 16) {
   4408         EltVT = MVT::i16;
   4409         NeedTrunc = true;
   4410       }
   4411 
   4412       unsigned Opcode = 0;
   4413       SDVTList LdResVTs;
   4414 
   4415       switch (NumElts) {
   4416       default:
   4417         return;
   4418       case 2:
   4419         switch (IntrinNo) {
   4420         default:
   4421           return;
   4422         case Intrinsic::nvvm_ldg_global_i:
   4423         case Intrinsic::nvvm_ldg_global_f:
   4424         case Intrinsic::nvvm_ldg_global_p:
   4425           Opcode = NVPTXISD::LDGV2;
   4426           break;
   4427         case Intrinsic::nvvm_ldu_global_i:
   4428         case Intrinsic::nvvm_ldu_global_f:
   4429         case Intrinsic::nvvm_ldu_global_p:
   4430           Opcode = NVPTXISD::LDUV2;
   4431           break;
   4432         }
   4433         LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
   4434         break;
   4435       case 4: {
   4436         switch (IntrinNo) {
   4437         default:
   4438           return;
   4439         case Intrinsic::nvvm_ldg_global_i:
   4440         case Intrinsic::nvvm_ldg_global_f:
   4441         case Intrinsic::nvvm_ldg_global_p:
   4442           Opcode = NVPTXISD::LDGV4;
   4443           break;
   4444         case Intrinsic::nvvm_ldu_global_i:
   4445         case Intrinsic::nvvm_ldu_global_f:
   4446         case Intrinsic::nvvm_ldu_global_p:
   4447           Opcode = NVPTXISD::LDUV4;
   4448           break;
   4449         }
   4450         EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
   4451         LdResVTs = DAG.getVTList(ListVTs);
   4452         break;
   4453       }
   4454       }
   4455 
   4456       SmallVector<SDValue, 8> OtherOps;
   4457 
   4458       // Copy regular operands
   4459 
   4460       OtherOps.push_back(Chain); // Chain
   4461                                  // Skip operand 1 (intrinsic ID)
   4462       // Others
   4463       OtherOps.append(N->op_begin() + 2, N->op_end());
   4464 
   4465       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
   4466 
   4467       SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
   4468                                               MemSD->getMemoryVT(),
   4469                                               MemSD->getMemOperand());
   4470 
   4471       SmallVector<SDValue, 4> ScalarRes;
   4472 
   4473       for (unsigned i = 0; i < NumElts; ++i) {
   4474         SDValue Res = NewLD.getValue(i);
   4475         if (NeedTrunc)
   4476           Res =
   4477               DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
   4478         ScalarRes.push_back(Res);
   4479       }
   4480 
   4481       SDValue LoadChain = NewLD.getValue(NumElts);
   4482 
   4483       SDValue BuildVec =
   4484           DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
   4485 
   4486       Results.push_back(BuildVec);
   4487       Results.push_back(LoadChain);
   4488     } else {
   4489       // i8 LDG/LDU
   4490       assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
   4491              "Custom handling of non-i8 ldu/ldg?");
   4492 
   4493       // Just copy all operands as-is
   4494       SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
   4495 
   4496       // Force output to i16
   4497       SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
   4498 
   4499       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
   4500 
   4501       // We make sure the memory type is i8, which will be used during isel
   4502       // to select the proper instruction.
   4503       SDValue NewLD =
   4504           DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
   4505                                   MVT::i8, MemSD->getMemOperand());
   4506 
   4507       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
   4508                                     NewLD.getValue(0)));
   4509       Results.push_back(NewLD.getValue(1));
   4510     }
   4511   }
   4512   }
   4513 }
   4514 
   4515 void NVPTXTargetLowering::ReplaceNodeResults(
   4516     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
   4517   switch (N->getOpcode()) {
   4518   default:
   4519     report_fatal_error("Unhandled custom legalization");
   4520   case ISD::LOAD:
   4521     ReplaceLoadVector(N, DAG, Results);
   4522     return;
   4523   case ISD::INTRINSIC_W_CHAIN:
   4524     ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
   4525     return;
   4526   }
   4527 }
   4528 
   4529 // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file.
   4530 void NVPTXSection::anchor() {}
   4531 
   4532 NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
   4533   delete static_cast<NVPTXSection *>(TextSection);
   4534   delete static_cast<NVPTXSection *>(DataSection);
   4535   delete static_cast<NVPTXSection *>(BSSSection);
   4536   delete static_cast<NVPTXSection *>(ReadOnlySection);
   4537 
   4538   delete static_cast<NVPTXSection *>(StaticCtorSection);
   4539   delete static_cast<NVPTXSection *>(StaticDtorSection);
   4540   delete static_cast<NVPTXSection *>(LSDASection);
   4541   delete static_cast<NVPTXSection *>(EHFrameSection);
   4542   delete static_cast<NVPTXSection *>(DwarfAbbrevSection);
   4543   delete static_cast<NVPTXSection *>(DwarfInfoSection);
   4544   delete static_cast<NVPTXSection *>(DwarfLineSection);
   4545   delete static_cast<NVPTXSection *>(DwarfFrameSection);
   4546   delete static_cast<NVPTXSection *>(DwarfPubTypesSection);
   4547   delete static_cast<const NVPTXSection *>(DwarfDebugInlineSection);
   4548   delete static_cast<NVPTXSection *>(DwarfStrSection);
   4549   delete static_cast<NVPTXSection *>(DwarfLocSection);
   4550   delete static_cast<NVPTXSection *>(DwarfARangesSection);
   4551   delete static_cast<NVPTXSection *>(DwarfRangesSection);
   4552 }
   4553 
   4554 MCSection *
   4555 NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
   4556                                               SectionKind Kind, Mangler &Mang,
   4557                                               const TargetMachine &TM) const {
   4558   return getDataSection();
   4559 }
   4560