Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines an instruction selector for the NVPTX target.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 
     15 #include "NVPTXISelDAGToDAG.h"
     16 #include "llvm/IR/GlobalValue.h"
     17 #include "llvm/IR/Instructions.h"
     18 #include "llvm/Support/CommandLine.h"
     19 #include "llvm/Support/Debug.h"
     20 #include "llvm/Support/ErrorHandling.h"
     21 #include "llvm/Support/raw_ostream.h"
     22 #include "llvm/Target/TargetIntrinsicInfo.h"
     23 
     24 #undef DEBUG_TYPE
     25 #define DEBUG_TYPE "nvptx-isel"
     26 
     27 using namespace llvm;
     28 
     29 
     30 static cl::opt<bool>
     31 UseFMADInstruction("nvptx-mad-enable",
     32                    cl::ZeroOrMore,
     33                 cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
     34                    cl::init(false));
     35 
     36 static cl::opt<int>
     37 FMAContractLevel("nvptx-fma-level",
     38                  cl::ZeroOrMore,
     39                  cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
     40                      " 1: do it  2: do it aggressively"),
     41                      cl::init(2));
     42 
     43 
     44 static cl::opt<int>
     45 UsePrecDivF32("nvptx-prec-divf32",
     46               cl::ZeroOrMore,
     47              cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
     48                   " IEEE Compliant F32 div.rnd if avaiable."),
     49                   cl::init(2));
     50 
     51 /// createNVPTXISelDag - This pass converts a legalized DAG into a
     52 /// NVPTX-specific DAG, ready for instruction scheduling.
     53 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
     54                                        llvm::CodeGenOpt::Level OptLevel) {
     55   return new NVPTXDAGToDAGISel(TM, OptLevel);
     56 }
     57 
     58 
     59 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
     60                                      CodeGenOpt::Level OptLevel)
     61 : SelectionDAGISel(tm, OptLevel),
     62   Subtarget(tm.getSubtarget<NVPTXSubtarget>())
     63 {
     64   // Always do fma.f32 fpcontract if the target supports the instruction.
     65   // Always do fma.f64 fpcontract if the target supports the instruction.
     66   // Do mad.f32 is nvptx-mad-enable is specified and the target does not
     67   // support fma.f32.
     68 
     69   doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
     70   doFMAF32 =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
     71       (FMAContractLevel>=1);
     72   doFMAF64 =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
     73       (FMAContractLevel>=1);
     74   doFMAF32AGG =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
     75       (FMAContractLevel==2);
     76   doFMAF64AGG =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
     77       (FMAContractLevel==2);
     78 
     79   allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
     80 
     81   UseF32FTZ = false;
     82 
     83   doMulWide = (OptLevel > 0);
     84 
     85   // Decide how to translate f32 div
     86   do_DIVF32_PREC = UsePrecDivF32;
     87   // sm less than sm_20 does not support div.rnd. Use div.full.
     88   if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
     89     do_DIVF32_PREC = 1;
     90 
     91 }
     92 
     93 /// Select - Select instructions not customized! Used for
     94 /// expanded, promoted and normal instructions.
     95 SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
     96 
     97   if (N->isMachineOpcode())
     98     return NULL;   // Already selected.
     99 
    100   SDNode *ResNode = NULL;
    101   switch (N->getOpcode()) {
    102   case ISD::LOAD:
    103     ResNode = SelectLoad(N);
    104     break;
    105   case ISD::STORE:
    106     ResNode = SelectStore(N);
    107     break;
    108   case NVPTXISD::LoadV2:
    109   case NVPTXISD::LoadV4:
    110     ResNode = SelectLoadVector(N);
    111     break;
    112   case NVPTXISD::LDGV2:
    113   case NVPTXISD::LDGV4:
    114   case NVPTXISD::LDUV2:
    115   case NVPTXISD::LDUV4:
    116     ResNode = SelectLDGLDUVector(N);
    117     break;
    118   case NVPTXISD::StoreV2:
    119   case NVPTXISD::StoreV4:
    120     ResNode = SelectStoreVector(N);
    121     break;
    122   default: break;
    123   }
    124   if (ResNode)
    125     return ResNode;
    126   return SelectCode(N);
    127 }
    128 
    129 
    130 static unsigned int
    131 getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
    132 {
    133   const Value *Src = N->getSrcValue();
    134   if (!Src)
    135     return NVPTX::PTXLdStInstCode::LOCAL;
    136 
    137   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
    138     switch (PT->getAddressSpace()) {
    139     case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
    140     case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
    141     case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
    142     case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
    143       return NVPTX::PTXLdStInstCode::CONSTANT;
    144     case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
    145     case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
    146     case llvm::ADDRESS_SPACE_CONST:
    147       // If the arch supports generic address space, translate it to GLOBAL
    148       // for correctness.
    149       // If the arch does not support generic address space, then the arch
    150       // does not really support ADDRESS_SPACE_CONST, translate it to
    151       // to CONSTANT for better performance.
    152       if (Subtarget.hasGenericLdSt())
    153         return NVPTX::PTXLdStInstCode::GLOBAL;
    154       else
    155         return NVPTX::PTXLdStInstCode::CONSTANT;
    156     default: break;
    157     }
    158   }
    159   return NVPTX::PTXLdStInstCode::LOCAL;
    160 }
    161 
    162 
    163 SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
    164   DebugLoc dl = N->getDebugLoc();
    165   LoadSDNode *LD = cast<LoadSDNode>(N);
    166   EVT LoadedVT = LD->getMemoryVT();
    167   SDNode *NVPTXLD= NULL;
    168 
    169   // do not support pre/post inc/dec
    170   if (LD->isIndexed())
    171     return NULL;
    172 
    173   if (!LoadedVT.isSimple())
    174     return NULL;
    175 
    176   // Address Space Setting
    177   unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
    178 
    179   // Volatile Setting
    180   // - .volatile is only availalble for .global and .shared
    181   bool isVolatile = LD->isVolatile();
    182   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    183       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    184       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    185     isVolatile = false;
    186 
    187   // Vector Setting
    188   MVT SimpleVT = LoadedVT.getSimpleVT();
    189   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
    190   if (SimpleVT.isVector()) {
    191     unsigned num = SimpleVT.getVectorNumElements();
    192     if (num == 2)
    193       vecType = NVPTX::PTXLdStInstCode::V2;
    194     else if (num == 4)
    195       vecType = NVPTX::PTXLdStInstCode::V4;
    196     else
    197       return NULL;
    198   }
    199 
    200   // Type Setting: fromType + fromTypeWidth
    201   //
    202   // Sign   : ISD::SEXTLOAD
    203   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
    204   //          type is integer
    205   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
    206   MVT ScalarVT = SimpleVT.getScalarType();
    207   unsigned fromTypeWidth =  ScalarVT.getSizeInBits();
    208   unsigned int fromType;
    209   if ((LD->getExtensionType() == ISD::SEXTLOAD))
    210     fromType = NVPTX::PTXLdStInstCode::Signed;
    211   else if (ScalarVT.isFloatingPoint())
    212     fromType = NVPTX::PTXLdStInstCode::Float;
    213   else
    214     fromType = NVPTX::PTXLdStInstCode::Unsigned;
    215 
    216   // Create the machine instruction DAG
    217   SDValue Chain = N->getOperand(0);
    218   SDValue N1 = N->getOperand(1);
    219   SDValue Addr;
    220   SDValue Offset, Base;
    221   unsigned Opcode;
    222   MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
    223 
    224   if (SelectDirectAddr(N1, Addr)) {
    225     switch (TargetVT) {
    226     case MVT::i8:    Opcode = NVPTX::LD_i8_avar; break;
    227     case MVT::i16:   Opcode = NVPTX::LD_i16_avar; break;
    228     case MVT::i32:   Opcode = NVPTX::LD_i32_avar; break;
    229     case MVT::i64:   Opcode = NVPTX::LD_i64_avar; break;
    230     case MVT::f32:   Opcode = NVPTX::LD_f32_avar; break;
    231     case MVT::f64:   Opcode = NVPTX::LD_f64_avar; break;
    232     default: return NULL;
    233     }
    234     SDValue Ops[] = { getI32Imm(isVolatile),
    235                       getI32Imm(codeAddrSpace),
    236                       getI32Imm(vecType),
    237                       getI32Imm(fromType),
    238                       getI32Imm(fromTypeWidth),
    239                       Addr, Chain };
    240     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
    241                                      MVT::Other, Ops, 7);
    242   } else if (Subtarget.is64Bit()?
    243       SelectADDRsi64(N1.getNode(), N1, Base, Offset):
    244       SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
    245     switch (TargetVT) {
    246     case MVT::i8:    Opcode = NVPTX::LD_i8_asi; break;
    247     case MVT::i16:   Opcode = NVPTX::LD_i16_asi; break;
    248     case MVT::i32:   Opcode = NVPTX::LD_i32_asi; break;
    249     case MVT::i64:   Opcode = NVPTX::LD_i64_asi; break;
    250     case MVT::f32:   Opcode = NVPTX::LD_f32_asi; break;
    251     case MVT::f64:   Opcode = NVPTX::LD_f64_asi; break;
    252     default: return NULL;
    253     }
    254     SDValue Ops[] = { getI32Imm(isVolatile),
    255                       getI32Imm(codeAddrSpace),
    256                       getI32Imm(vecType),
    257                       getI32Imm(fromType),
    258                       getI32Imm(fromTypeWidth),
    259                       Base, Offset, Chain };
    260     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
    261                                      MVT::Other, Ops, 8);
    262   } else if (Subtarget.is64Bit()?
    263       SelectADDRri64(N1.getNode(), N1, Base, Offset):
    264       SelectADDRri(N1.getNode(), N1, Base, Offset)) {
    265     if (Subtarget.is64Bit()) {
    266       switch (TargetVT) {
    267       case MVT::i8:    Opcode = NVPTX::LD_i8_ari_64; break;
    268       case MVT::i16:   Opcode = NVPTX::LD_i16_ari_64; break;
    269       case MVT::i32:   Opcode = NVPTX::LD_i32_ari_64; break;
    270       case MVT::i64:   Opcode = NVPTX::LD_i64_ari_64; break;
    271       case MVT::f32:   Opcode = NVPTX::LD_f32_ari_64; break;
    272       case MVT::f64:   Opcode = NVPTX::LD_f64_ari_64; break;
    273       default: return NULL;
    274       }
    275     } else {
    276       switch (TargetVT) {
    277       case MVT::i8:    Opcode = NVPTX::LD_i8_ari; break;
    278       case MVT::i16:   Opcode = NVPTX::LD_i16_ari; break;
    279       case MVT::i32:   Opcode = NVPTX::LD_i32_ari; break;
    280       case MVT::i64:   Opcode = NVPTX::LD_i64_ari; break;
    281       case MVT::f32:   Opcode = NVPTX::LD_f32_ari; break;
    282       case MVT::f64:   Opcode = NVPTX::LD_f64_ari; break;
    283       default: return NULL;
    284       }
    285     }
    286     SDValue Ops[] = { getI32Imm(isVolatile),
    287                       getI32Imm(codeAddrSpace),
    288                       getI32Imm(vecType),
    289                       getI32Imm(fromType),
    290                       getI32Imm(fromTypeWidth),
    291                       Base, Offset, Chain };
    292     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
    293                                      MVT::Other, Ops, 8);
    294   }
    295   else {
    296     if (Subtarget.is64Bit()) {
    297       switch (TargetVT) {
    298       case MVT::i8:    Opcode = NVPTX::LD_i8_areg_64; break;
    299       case MVT::i16:   Opcode = NVPTX::LD_i16_areg_64; break;
    300       case MVT::i32:   Opcode = NVPTX::LD_i32_areg_64; break;
    301       case MVT::i64:   Opcode = NVPTX::LD_i64_areg_64; break;
    302       case MVT::f32:   Opcode = NVPTX::LD_f32_areg_64; break;
    303       case MVT::f64:   Opcode = NVPTX::LD_f64_areg_64; break;
    304       default: return NULL;
    305       }
    306     } else {
    307       switch (TargetVT) {
    308       case MVT::i8:    Opcode = NVPTX::LD_i8_areg; break;
    309       case MVT::i16:   Opcode = NVPTX::LD_i16_areg; break;
    310       case MVT::i32:   Opcode = NVPTX::LD_i32_areg; break;
    311       case MVT::i64:   Opcode = NVPTX::LD_i64_areg; break;
    312       case MVT::f32:   Opcode = NVPTX::LD_f32_areg; break;
    313       case MVT::f64:   Opcode = NVPTX::LD_f64_areg; break;
    314       default: return NULL;
    315       }
    316     }
    317     SDValue Ops[] = { getI32Imm(isVolatile),
    318                       getI32Imm(codeAddrSpace),
    319                       getI32Imm(vecType),
    320                       getI32Imm(fromType),
    321                       getI32Imm(fromTypeWidth),
    322                       N1, Chain };
    323     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
    324                                      MVT::Other, Ops, 7);
    325   }
    326 
    327   if (NVPTXLD != NULL) {
    328     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
    329     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
    330     cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
    331   }
    332 
    333   return NVPTXLD;
    334 }
    335 
    336 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
    337 
    338   SDValue Chain = N->getOperand(0);
    339   SDValue Op1 = N->getOperand(1);
    340   SDValue Addr, Offset, Base;
    341   unsigned Opcode;
    342   DebugLoc DL = N->getDebugLoc();
    343   SDNode *LD;
    344   MemSDNode *MemSD = cast<MemSDNode>(N);
    345   EVT LoadedVT = MemSD->getMemoryVT();
    346 
    347 
    348   if (!LoadedVT.isSimple())
    349      return NULL;
    350 
    351   // Address Space Setting
    352   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
    353 
    354   // Volatile Setting
    355   // - .volatile is only availalble for .global and .shared
    356   bool IsVolatile = MemSD->isVolatile();
    357   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    358       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    359       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    360     IsVolatile = false;
    361 
    362   // Vector Setting
    363   MVT SimpleVT = LoadedVT.getSimpleVT();
    364 
    365   // Type Setting: fromType + fromTypeWidth
    366   //
    367   // Sign   : ISD::SEXTLOAD
    368   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
    369   //          type is integer
    370   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
    371   MVT ScalarVT = SimpleVT.getScalarType();
    372   unsigned FromTypeWidth =  ScalarVT.getSizeInBits();
    373   unsigned int FromType;
    374   // The last operand holds the original LoadSDNode::getExtensionType() value
    375   unsigned ExtensionType =
    376     cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue();
    377   if (ExtensionType == ISD::SEXTLOAD)
    378     FromType = NVPTX::PTXLdStInstCode::Signed;
    379   else if (ScalarVT.isFloatingPoint())
    380     FromType = NVPTX::PTXLdStInstCode::Float;
    381   else
    382     FromType = NVPTX::PTXLdStInstCode::Unsigned;
    383 
    384   unsigned VecType;
    385 
    386   switch (N->getOpcode()) {
    387   case NVPTXISD::LoadV2:  VecType = NVPTX::PTXLdStInstCode::V2; break;
    388   case NVPTXISD::LoadV4:  VecType = NVPTX::PTXLdStInstCode::V4; break;
    389   default: return NULL;
    390   }
    391 
    392   EVT EltVT = N->getValueType(0);
    393 
    394   if (SelectDirectAddr(Op1, Addr)) {
    395     switch (N->getOpcode()) {
    396     default: return NULL;
    397     case NVPTXISD::LoadV2:
    398       switch (EltVT.getSimpleVT().SimpleTy) {
    399       default: return NULL;
    400       case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_avar; break;
    401       case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_avar; break;
    402       case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_avar; break;
    403       case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_avar; break;
    404       case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_avar; break;
    405       case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_avar; break;
    406       }
    407       break;
    408     case NVPTXISD::LoadV4:
    409       switch (EltVT.getSimpleVT().SimpleTy) {
    410       default: return NULL;
    411       case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_avar; break;
    412       case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_avar; break;
    413       case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_avar; break;
    414       case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_avar; break;
    415       }
    416       break;
    417     }
    418 
    419     SDValue Ops[] = { getI32Imm(IsVolatile),
    420                       getI32Imm(CodeAddrSpace),
    421                       getI32Imm(VecType),
    422                       getI32Imm(FromType),
    423                       getI32Imm(FromTypeWidth),
    424                       Addr, Chain };
    425     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
    426   } else if (Subtarget.is64Bit()?
    427              SelectADDRsi64(Op1.getNode(), Op1, Base, Offset):
    428              SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
    429     switch (N->getOpcode()) {
    430     default: return NULL;
    431     case NVPTXISD::LoadV2:
    432       switch (EltVT.getSimpleVT().SimpleTy) {
    433       default: return NULL;
    434       case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_asi; break;
    435       case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_asi; break;
    436       case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_asi; break;
    437       case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_asi; break;
    438       case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_asi; break;
    439       case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_asi; break;
    440       }
    441       break;
    442     case NVPTXISD::LoadV4:
    443       switch (EltVT.getSimpleVT().SimpleTy) {
    444       default: return NULL;
    445       case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_asi; break;
    446       case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_asi; break;
    447       case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_asi; break;
    448       case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_asi; break;
    449       }
    450       break;
    451     }
    452 
    453     SDValue Ops[] = { getI32Imm(IsVolatile),
    454                       getI32Imm(CodeAddrSpace),
    455                       getI32Imm(VecType),
    456                       getI32Imm(FromType),
    457                       getI32Imm(FromTypeWidth),
    458                       Base, Offset, Chain };
    459     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
    460   } else if (Subtarget.is64Bit()?
    461              SelectADDRri64(Op1.getNode(), Op1, Base, Offset):
    462              SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
    463     if (Subtarget.is64Bit()) {
    464       switch (N->getOpcode()) {
    465       default: return NULL;
    466       case NVPTXISD::LoadV2:
    467         switch (EltVT.getSimpleVT().SimpleTy) {
    468         default: return NULL;
    469         case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_ari_64; break;
    470         case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_ari_64; break;
    471         case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_ari_64; break;
    472         case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_ari_64; break;
    473         case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_ari_64; break;
    474         case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_ari_64; break;
    475         }
    476         break;
    477       case NVPTXISD::LoadV4:
    478         switch (EltVT.getSimpleVT().SimpleTy) {
    479         default: return NULL;
    480         case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_ari_64; break;
    481         case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_ari_64; break;
    482         case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_ari_64; break;
    483         case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_ari_64; break;
    484         }
    485         break;
    486       }
    487     } else {
    488       switch (N->getOpcode()) {
    489       default: return NULL;
    490       case NVPTXISD::LoadV2:
    491         switch (EltVT.getSimpleVT().SimpleTy) {
    492         default: return NULL;
    493         case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_ari; break;
    494         case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_ari; break;
    495         case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_ari; break;
    496         case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_ari; break;
    497         case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_ari; break;
    498         case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_ari; break;
    499         }
    500         break;
    501       case NVPTXISD::LoadV4:
    502         switch (EltVT.getSimpleVT().SimpleTy) {
    503         default: return NULL;
    504         case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_ari; break;
    505         case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_ari; break;
    506         case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_ari; break;
    507         case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_ari; break;
    508         }
    509         break;
    510       }
    511     }
    512 
    513     SDValue Ops[] = { getI32Imm(IsVolatile),
    514                       getI32Imm(CodeAddrSpace),
    515                       getI32Imm(VecType),
    516                       getI32Imm(FromType),
    517                       getI32Imm(FromTypeWidth),
    518                       Base, Offset, Chain };
    519 
    520     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
    521   } else {
    522     if (Subtarget.is64Bit()) {
    523       switch (N->getOpcode()) {
    524       default: return NULL;
    525       case NVPTXISD::LoadV2:
    526         switch (EltVT.getSimpleVT().SimpleTy) {
    527         default: return NULL;
    528         case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_areg_64; break;
    529         case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_areg_64; break;
    530         case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_areg_64; break;
    531         case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_areg_64; break;
    532         case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_areg_64; break;
    533         case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_areg_64; break;
    534         }
    535         break;
    536       case NVPTXISD::LoadV4:
    537         switch (EltVT.getSimpleVT().SimpleTy) {
    538         default: return NULL;
    539         case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_areg_64; break;
    540         case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_areg_64; break;
    541         case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_areg_64; break;
    542         case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_areg_64; break;
    543         }
    544         break;
    545       }
    546     } else {
    547       switch (N->getOpcode()) {
    548       default: return NULL;
    549       case NVPTXISD::LoadV2:
    550         switch (EltVT.getSimpleVT().SimpleTy) {
    551         default: return NULL;
    552         case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_areg; break;
    553         case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_areg; break;
    554         case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_areg; break;
    555         case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_areg; break;
    556         case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_areg; break;
    557         case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_areg; break;
    558         }
    559         break;
    560       case NVPTXISD::LoadV4:
    561         switch (EltVT.getSimpleVT().SimpleTy) {
    562         default: return NULL;
    563         case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_areg; break;
    564         case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_areg; break;
    565         case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_areg; break;
    566         case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_areg; break;
    567         }
    568         break;
    569       }
    570     }
    571 
    572     SDValue Ops[] = { getI32Imm(IsVolatile),
    573                       getI32Imm(CodeAddrSpace),
    574                       getI32Imm(VecType),
    575                       getI32Imm(FromType),
    576                       getI32Imm(FromTypeWidth),
    577                       Op1, Chain };
    578     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
    579   }
    580 
    581   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
    582   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
    583   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
    584 
    585   return LD;
    586 }
    587 
    588 SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
    589 
    590   SDValue Chain = N->getOperand(0);
    591   SDValue Op1 = N->getOperand(1);
    592   unsigned Opcode;
    593   DebugLoc DL = N->getDebugLoc();
    594   SDNode *LD;
    595 
    596   EVT RetVT = N->getValueType(0);
    597 
    598   // Select opcode
    599   if (Subtarget.is64Bit()) {
    600     switch (N->getOpcode()) {
    601     default: return NULL;
    602     case NVPTXISD::LDGV2:
    603       switch (RetVT.getSimpleVT().SimpleTy) {
    604       default: return NULL;
    605       case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break;
    606       case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break;
    607       case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break;
    608       case MVT::i64:  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break;
    609       case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break;
    610       case MVT::f64:  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break;
    611       }
    612       break;
    613     case NVPTXISD::LDGV4:
    614       switch (RetVT.getSimpleVT().SimpleTy) {
    615       default: return NULL;
    616       case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break;
    617       case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break;
    618       case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break;
    619       case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break;
    620       }
    621       break;
    622     case NVPTXISD::LDUV2:
    623       switch (RetVT.getSimpleVT().SimpleTy) {
    624       default: return NULL;
    625       case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break;
    626       case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break;
    627       case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break;
    628       case MVT::i64:  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break;
    629       case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break;
    630       case MVT::f64:  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break;
    631       }
    632       break;
    633     case NVPTXISD::LDUV4:
    634       switch (RetVT.getSimpleVT().SimpleTy) {
    635       default: return NULL;
    636       case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break;
    637       case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break;
    638       case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break;
    639       case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break;
    640       }
    641       break;
    642     }
    643   } else {
    644     switch (N->getOpcode()) {
    645     default: return NULL;
    646     case NVPTXISD::LDGV2:
    647       switch (RetVT.getSimpleVT().SimpleTy) {
    648       default: return NULL;
    649       case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break;
    650       case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break;
    651       case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break;
    652       case MVT::i64:  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break;
    653       case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break;
    654       case MVT::f64:  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break;
    655       }
    656       break;
    657     case NVPTXISD::LDGV4:
    658       switch (RetVT.getSimpleVT().SimpleTy) {
    659       default: return NULL;
    660       case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break;
    661       case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break;
    662       case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break;
    663       case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break;
    664       }
    665       break;
    666     case NVPTXISD::LDUV2:
    667       switch (RetVT.getSimpleVT().SimpleTy) {
    668       default: return NULL;
    669       case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break;
    670       case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break;
    671       case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break;
    672       case MVT::i64:  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break;
    673       case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break;
    674       case MVT::f64:  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break;
    675       }
    676       break;
    677     case NVPTXISD::LDUV4:
    678       switch (RetVT.getSimpleVT().SimpleTy) {
    679       default: return NULL;
    680       case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break;
    681       case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break;
    682       case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break;
    683       case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break;
    684       }
    685       break;
    686     }
    687   }
    688 
    689   SDValue Ops[] = { Op1, Chain };
    690   LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
    691 
    692   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
    693   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
    694   cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
    695 
    696   return LD;
    697 }
    698 
    699 
    700 SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
    701   DebugLoc dl = N->getDebugLoc();
    702   StoreSDNode *ST = cast<StoreSDNode>(N);
    703   EVT StoreVT = ST->getMemoryVT();
    704   SDNode *NVPTXST = NULL;
    705 
    706   // do not support pre/post inc/dec
    707   if (ST->isIndexed())
    708     return NULL;
    709 
    710   if (!StoreVT.isSimple())
    711     return NULL;
    712 
    713   // Address Space Setting
    714   unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
    715 
    716   // Volatile Setting
    717   // - .volatile is only availalble for .global and .shared
    718   bool isVolatile = ST->isVolatile();
    719   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    720       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    721       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    722     isVolatile = false;
    723 
    724   // Vector Setting
    725   MVT SimpleVT = StoreVT.getSimpleVT();
    726   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
    727   if (SimpleVT.isVector()) {
    728     unsigned num = SimpleVT.getVectorNumElements();
    729     if (num == 2)
    730       vecType = NVPTX::PTXLdStInstCode::V2;
    731     else if (num == 4)
    732       vecType = NVPTX::PTXLdStInstCode::V4;
    733     else
    734       return NULL;
    735   }
    736 
    737   // Type Setting: toType + toTypeWidth
    738   // - for integer type, always use 'u'
    739   //
    740   MVT ScalarVT = SimpleVT.getScalarType();
    741   unsigned toTypeWidth =  ScalarVT.getSizeInBits();
    742   unsigned int toType;
    743   if (ScalarVT.isFloatingPoint())
    744     toType = NVPTX::PTXLdStInstCode::Float;
    745   else
    746     toType = NVPTX::PTXLdStInstCode::Unsigned;
    747 
    748   // Create the machine instruction DAG
    749   SDValue Chain = N->getOperand(0);
    750   SDValue N1 = N->getOperand(1);
    751   SDValue N2 = N->getOperand(2);
    752   SDValue Addr;
    753   SDValue Offset, Base;
    754   unsigned Opcode;
    755   MVT::SimpleValueType SourceVT =
    756       N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
    757 
    758   if (SelectDirectAddr(N2, Addr)) {
    759     switch (SourceVT) {
    760     case MVT::i8:    Opcode = NVPTX::ST_i8_avar; break;
    761     case MVT::i16:   Opcode = NVPTX::ST_i16_avar; break;
    762     case MVT::i32:   Opcode = NVPTX::ST_i32_avar; break;
    763     case MVT::i64:   Opcode = NVPTX::ST_i64_avar; break;
    764     case MVT::f32:   Opcode = NVPTX::ST_f32_avar; break;
    765     case MVT::f64:   Opcode = NVPTX::ST_f64_avar; break;
    766     default: return NULL;
    767     }
    768     SDValue Ops[] = { N1,
    769                       getI32Imm(isVolatile),
    770                       getI32Imm(codeAddrSpace),
    771                       getI32Imm(vecType),
    772                       getI32Imm(toType),
    773                       getI32Imm(toTypeWidth),
    774                       Addr, Chain };
    775     NVPTXST = CurDAG->getMachineNode(Opcode, dl,
    776                                      MVT::Other, Ops, 8);
    777   } else if (Subtarget.is64Bit()?
    778       SelectADDRsi64(N2.getNode(), N2, Base, Offset):
    779       SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
    780     switch (SourceVT) {
    781     case MVT::i8:    Opcode = NVPTX::ST_i8_asi; break;
    782     case MVT::i16:   Opcode = NVPTX::ST_i16_asi; break;
    783     case MVT::i32:   Opcode = NVPTX::ST_i32_asi; break;
    784     case MVT::i64:   Opcode = NVPTX::ST_i64_asi; break;
    785     case MVT::f32:   Opcode = NVPTX::ST_f32_asi; break;
    786     case MVT::f64:   Opcode = NVPTX::ST_f64_asi; break;
    787     default: return NULL;
    788     }
    789     SDValue Ops[] = { N1,
    790                       getI32Imm(isVolatile),
    791                       getI32Imm(codeAddrSpace),
    792                       getI32Imm(vecType),
    793                       getI32Imm(toType),
    794                       getI32Imm(toTypeWidth),
    795                       Base, Offset, Chain };
    796     NVPTXST = CurDAG->getMachineNode(Opcode, dl,
    797                                      MVT::Other, Ops, 9);
    798   } else if (Subtarget.is64Bit()?
    799       SelectADDRri64(N2.getNode(), N2, Base, Offset):
    800       SelectADDRri(N2.getNode(), N2, Base, Offset)) {
    801     if (Subtarget.is64Bit()) {
    802       switch (SourceVT) {
    803       case MVT::i8:    Opcode = NVPTX::ST_i8_ari_64; break;
    804       case MVT::i16:   Opcode = NVPTX::ST_i16_ari_64; break;
    805       case MVT::i32:   Opcode = NVPTX::ST_i32_ari_64; break;
    806       case MVT::i64:   Opcode = NVPTX::ST_i64_ari_64; break;
    807       case MVT::f32:   Opcode = NVPTX::ST_f32_ari_64; break;
    808       case MVT::f64:   Opcode = NVPTX::ST_f64_ari_64; break;
    809       default: return NULL;
    810       }
    811     } else {
    812       switch (SourceVT) {
    813       case MVT::i8:    Opcode = NVPTX::ST_i8_ari; break;
    814       case MVT::i16:   Opcode = NVPTX::ST_i16_ari; break;
    815       case MVT::i32:   Opcode = NVPTX::ST_i32_ari; break;
    816       case MVT::i64:   Opcode = NVPTX::ST_i64_ari; break;
    817       case MVT::f32:   Opcode = NVPTX::ST_f32_ari; break;
    818       case MVT::f64:   Opcode = NVPTX::ST_f64_ari; break;
    819       default: return NULL;
    820       }
    821     }
    822     SDValue Ops[] = { N1,
    823                       getI32Imm(isVolatile),
    824                       getI32Imm(codeAddrSpace),
    825                       getI32Imm(vecType),
    826                       getI32Imm(toType),
    827                       getI32Imm(toTypeWidth),
    828                       Base, Offset, Chain };
    829     NVPTXST = CurDAG->getMachineNode(Opcode, dl,
    830                                      MVT::Other, Ops, 9);
    831   } else {
    832     if (Subtarget.is64Bit()) {
    833       switch (SourceVT) {
    834       case MVT::i8:    Opcode = NVPTX::ST_i8_areg_64; break;
    835       case MVT::i16:   Opcode = NVPTX::ST_i16_areg_64; break;
    836       case MVT::i32:   Opcode = NVPTX::ST_i32_areg_64; break;
    837       case MVT::i64:   Opcode = NVPTX::ST_i64_areg_64; break;
    838       case MVT::f32:   Opcode = NVPTX::ST_f32_areg_64; break;
    839       case MVT::f64:   Opcode = NVPTX::ST_f64_areg_64; break;
    840       default: return NULL;
    841       }
    842     } else {
    843       switch (SourceVT) {
    844       case MVT::i8:    Opcode = NVPTX::ST_i8_areg; break;
    845       case MVT::i16:   Opcode = NVPTX::ST_i16_areg; break;
    846       case MVT::i32:   Opcode = NVPTX::ST_i32_areg; break;
    847       case MVT::i64:   Opcode = NVPTX::ST_i64_areg; break;
    848       case MVT::f32:   Opcode = NVPTX::ST_f32_areg; break;
    849       case MVT::f64:   Opcode = NVPTX::ST_f64_areg; break;
    850       default: return NULL;
    851       }
    852     }
    853     SDValue Ops[] = { N1,
    854                       getI32Imm(isVolatile),
    855                       getI32Imm(codeAddrSpace),
    856                       getI32Imm(vecType),
    857                       getI32Imm(toType),
    858                       getI32Imm(toTypeWidth),
    859                       N2, Chain };
    860     NVPTXST = CurDAG->getMachineNode(Opcode, dl,
    861                                      MVT::Other, Ops, 8);
    862   }
    863 
    864   if (NVPTXST != NULL) {
    865     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
    866     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
    867     cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
    868   }
    869 
    870   return NVPTXST;
    871 }
    872 
    873 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
    874   SDValue Chain = N->getOperand(0);
    875   SDValue Op1 = N->getOperand(1);
    876   SDValue Addr, Offset, Base;
    877   unsigned Opcode;
    878   DebugLoc DL = N->getDebugLoc();
    879   SDNode *ST;
    880   EVT EltVT = Op1.getValueType();
    881   MemSDNode *MemSD = cast<MemSDNode>(N);
    882   EVT StoreVT = MemSD->getMemoryVT();
    883 
    884   // Address Space Setting
    885   unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
    886 
    887   if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
    888     report_fatal_error("Cannot store to pointer that points to constant "
    889                        "memory space");
    890   }
    891 
    892   // Volatile Setting
    893   // - .volatile is only availalble for .global and .shared
    894   bool IsVolatile = MemSD->isVolatile();
    895   if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    896       CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    897       CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    898     IsVolatile = false;
    899 
    900   // Type Setting: toType + toTypeWidth
    901   // - for integer type, always use 'u'
    902   assert(StoreVT.isSimple() && "Store value is not simple");
    903   MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
    904   unsigned ToTypeWidth =  ScalarVT.getSizeInBits();
    905   unsigned ToType;
    906   if (ScalarVT.isFloatingPoint())
    907     ToType = NVPTX::PTXLdStInstCode::Float;
    908   else
    909     ToType = NVPTX::PTXLdStInstCode::Unsigned;
    910 
    911 
    912   SmallVector<SDValue, 12> StOps;
    913   SDValue N2;
    914   unsigned VecType;
    915 
    916   switch (N->getOpcode()) {
    917   case NVPTXISD::StoreV2:
    918     VecType = NVPTX::PTXLdStInstCode::V2;
    919     StOps.push_back(N->getOperand(1));
    920     StOps.push_back(N->getOperand(2));
    921     N2 = N->getOperand(3);
    922     break;
    923   case NVPTXISD::StoreV4:
    924     VecType = NVPTX::PTXLdStInstCode::V4;
    925     StOps.push_back(N->getOperand(1));
    926     StOps.push_back(N->getOperand(2));
    927     StOps.push_back(N->getOperand(3));
    928     StOps.push_back(N->getOperand(4));
    929     N2 = N->getOperand(5);
    930     break;
    931   default: return NULL;
    932   }
    933 
    934   StOps.push_back(getI32Imm(IsVolatile));
    935   StOps.push_back(getI32Imm(CodeAddrSpace));
    936   StOps.push_back(getI32Imm(VecType));
    937   StOps.push_back(getI32Imm(ToType));
    938   StOps.push_back(getI32Imm(ToTypeWidth));
    939 
    940   if (SelectDirectAddr(N2, Addr)) {
    941     switch (N->getOpcode()) {
    942     default: return NULL;
    943     case NVPTXISD::StoreV2:
    944       switch (EltVT.getSimpleVT().SimpleTy) {
    945       default: return NULL;
    946       case MVT::i8:   Opcode = NVPTX::STV_i8_v2_avar; break;
    947       case MVT::i16:  Opcode = NVPTX::STV_i16_v2_avar; break;
    948       case MVT::i32:  Opcode = NVPTX::STV_i32_v2_avar; break;
    949       case MVT::i64:  Opcode = NVPTX::STV_i64_v2_avar; break;
    950       case MVT::f32:  Opcode = NVPTX::STV_f32_v2_avar; break;
    951       case MVT::f64:  Opcode = NVPTX::STV_f64_v2_avar; break;
    952       }
    953       break;
    954     case NVPTXISD::StoreV4:
    955       switch (EltVT.getSimpleVT().SimpleTy) {
    956       default: return NULL;
    957       case MVT::i8:   Opcode = NVPTX::STV_i8_v4_avar; break;
    958       case MVT::i16:  Opcode = NVPTX::STV_i16_v4_avar; break;
    959       case MVT::i32:  Opcode = NVPTX::STV_i32_v4_avar; break;
    960       case MVT::f32:  Opcode = NVPTX::STV_f32_v4_avar; break;
    961       }
    962       break;
    963     }
    964     StOps.push_back(Addr);
    965   } else if (Subtarget.is64Bit()?
    966              SelectADDRsi64(N2.getNode(), N2, Base, Offset):
    967              SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
    968     switch (N->getOpcode()) {
    969     default: return NULL;
    970     case NVPTXISD::StoreV2:
    971       switch (EltVT.getSimpleVT().SimpleTy) {
    972       default: return NULL;
    973       case MVT::i8:   Opcode = NVPTX::STV_i8_v2_asi; break;
    974       case MVT::i16:  Opcode = NVPTX::STV_i16_v2_asi; break;
    975       case MVT::i32:  Opcode = NVPTX::STV_i32_v2_asi; break;
    976       case MVT::i64:  Opcode = NVPTX::STV_i64_v2_asi; break;
    977       case MVT::f32:  Opcode = NVPTX::STV_f32_v2_asi; break;
    978       case MVT::f64:  Opcode = NVPTX::STV_f64_v2_asi; break;
    979       }
    980       break;
    981     case NVPTXISD::StoreV4:
    982       switch (EltVT.getSimpleVT().SimpleTy) {
    983       default: return NULL;
    984       case MVT::i8:   Opcode = NVPTX::STV_i8_v4_asi; break;
    985       case MVT::i16:  Opcode = NVPTX::STV_i16_v4_asi; break;
    986       case MVT::i32:  Opcode = NVPTX::STV_i32_v4_asi; break;
    987       case MVT::f32:  Opcode = NVPTX::STV_f32_v4_asi; break;
    988       }
    989       break;
    990     }
    991     StOps.push_back(Base);
    992     StOps.push_back(Offset);
    993   } else if (Subtarget.is64Bit()?
    994              SelectADDRri64(N2.getNode(), N2, Base, Offset):
    995              SelectADDRri(N2.getNode(), N2, Base, Offset)) {
    996     if (Subtarget.is64Bit()) {
    997       switch (N->getOpcode()) {
    998       default: return NULL;
    999       case NVPTXISD::StoreV2:
   1000         switch (EltVT.getSimpleVT().SimpleTy) {
   1001         default: return NULL;
   1002         case MVT::i8:   Opcode = NVPTX::STV_i8_v2_ari_64; break;
   1003         case MVT::i16:  Opcode = NVPTX::STV_i16_v2_ari_64; break;
   1004         case MVT::i32:  Opcode = NVPTX::STV_i32_v2_ari_64; break;
   1005         case MVT::i64:  Opcode = NVPTX::STV_i64_v2_ari_64; break;
   1006         case MVT::f32:  Opcode = NVPTX::STV_f32_v2_ari_64; break;
   1007         case MVT::f64:  Opcode = NVPTX::STV_f64_v2_ari_64; break;
   1008         }
   1009         break;
   1010       case NVPTXISD::StoreV4:
   1011         switch (EltVT.getSimpleVT().SimpleTy) {
   1012         default: return NULL;
   1013         case MVT::i8:   Opcode = NVPTX::STV_i8_v4_ari_64; break;
   1014         case MVT::i16:  Opcode = NVPTX::STV_i16_v4_ari_64; break;
   1015         case MVT::i32:  Opcode = NVPTX::STV_i32_v4_ari_64; break;
   1016         case MVT::f32:  Opcode = NVPTX::STV_f32_v4_ari_64; break;
   1017         }
   1018         break;
   1019       }
   1020     } else {
   1021       switch (N->getOpcode()) {
   1022       default: return NULL;
   1023       case NVPTXISD::StoreV2:
   1024         switch (EltVT.getSimpleVT().SimpleTy) {
   1025         default: return NULL;
   1026         case MVT::i8:   Opcode = NVPTX::STV_i8_v2_ari; break;
   1027         case MVT::i16:  Opcode = NVPTX::STV_i16_v2_ari; break;
   1028         case MVT::i32:  Opcode = NVPTX::STV_i32_v2_ari; break;
   1029         case MVT::i64:  Opcode = NVPTX::STV_i64_v2_ari; break;
   1030         case MVT::f32:  Opcode = NVPTX::STV_f32_v2_ari; break;
   1031         case MVT::f64:  Opcode = NVPTX::STV_f64_v2_ari; break;
   1032         }
   1033         break;
   1034       case NVPTXISD::StoreV4:
   1035         switch (EltVT.getSimpleVT().SimpleTy) {
   1036         default: return NULL;
   1037         case MVT::i8:   Opcode = NVPTX::STV_i8_v4_ari; break;
   1038         case MVT::i16:  Opcode = NVPTX::STV_i16_v4_ari; break;
   1039         case MVT::i32:  Opcode = NVPTX::STV_i32_v4_ari; break;
   1040         case MVT::f32:  Opcode = NVPTX::STV_f32_v4_ari; break;
   1041         }
   1042         break;
   1043       }
   1044     }
   1045     StOps.push_back(Base);
   1046     StOps.push_back(Offset);
   1047   } else {
   1048     if (Subtarget.is64Bit()) {
   1049       switch (N->getOpcode()) {
   1050       default: return NULL;
   1051       case NVPTXISD::StoreV2:
   1052         switch (EltVT.getSimpleVT().SimpleTy) {
   1053         default: return NULL;
   1054         case MVT::i8:   Opcode = NVPTX::STV_i8_v2_areg_64; break;
   1055         case MVT::i16:  Opcode = NVPTX::STV_i16_v2_areg_64; break;
   1056         case MVT::i32:  Opcode = NVPTX::STV_i32_v2_areg_64; break;
   1057         case MVT::i64:  Opcode = NVPTX::STV_i64_v2_areg_64; break;
   1058         case MVT::f32:  Opcode = NVPTX::STV_f32_v2_areg_64; break;
   1059         case MVT::f64:  Opcode = NVPTX::STV_f64_v2_areg_64; break;
   1060         }
   1061         break;
   1062       case NVPTXISD::StoreV4:
   1063         switch (EltVT.getSimpleVT().SimpleTy) {
   1064         default: return NULL;
   1065         case MVT::i8:   Opcode = NVPTX::STV_i8_v4_areg_64; break;
   1066         case MVT::i16:  Opcode = NVPTX::STV_i16_v4_areg_64; break;
   1067         case MVT::i32:  Opcode = NVPTX::STV_i32_v4_areg_64; break;
   1068         case MVT::f32:  Opcode = NVPTX::STV_f32_v4_areg_64; break;
   1069         }
   1070         break;
   1071       }
   1072     } else {
   1073       switch (N->getOpcode()) {
   1074       default: return NULL;
   1075       case NVPTXISD::StoreV2:
   1076         switch (EltVT.getSimpleVT().SimpleTy) {
   1077         default: return NULL;
   1078         case MVT::i8:   Opcode = NVPTX::STV_i8_v2_areg; break;
   1079         case MVT::i16:  Opcode = NVPTX::STV_i16_v2_areg; break;
   1080         case MVT::i32:  Opcode = NVPTX::STV_i32_v2_areg; break;
   1081         case MVT::i64:  Opcode = NVPTX::STV_i64_v2_areg; break;
   1082         case MVT::f32:  Opcode = NVPTX::STV_f32_v2_areg; break;
   1083         case MVT::f64:  Opcode = NVPTX::STV_f64_v2_areg; break;
   1084         }
   1085         break;
   1086       case NVPTXISD::StoreV4:
   1087         switch (EltVT.getSimpleVT().SimpleTy) {
   1088         default: return NULL;
   1089         case MVT::i8:   Opcode = NVPTX::STV_i8_v4_areg; break;
   1090         case MVT::i16:  Opcode = NVPTX::STV_i16_v4_areg; break;
   1091         case MVT::i32:  Opcode = NVPTX::STV_i32_v4_areg; break;
   1092         case MVT::f32:  Opcode = NVPTX::STV_f32_v4_areg; break;
   1093         }
   1094         break;
   1095       }
   1096     }
   1097     StOps.push_back(N2);
   1098   }
   1099 
   1100   StOps.push_back(Chain);
   1101 
   1102   ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
   1103 
   1104   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   1105   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
   1106   cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
   1107 
   1108   return ST;
   1109 }
   1110 
   1111 // SelectDirectAddr - Match a direct address for DAG.
   1112 // A direct address could be a globaladdress or externalsymbol.
   1113 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
   1114   // Return true if TGA or ES.
   1115   if (N.getOpcode() == ISD::TargetGlobalAddress
   1116       || N.getOpcode() == ISD::TargetExternalSymbol) {
   1117     Address = N;
   1118     return true;
   1119   }
   1120   if (N.getOpcode() == NVPTXISD::Wrapper) {
   1121     Address = N.getOperand(0);
   1122     return true;
   1123   }
   1124   if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
   1125     unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
   1126     if (IID == Intrinsic::nvvm_ptr_gen_to_param)
   1127       if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
   1128         return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
   1129   }
   1130   return false;
   1131 }
   1132 
   1133 // symbol+offset
   1134 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
   1135                                          SDValue &Base, SDValue &Offset,
   1136                                          MVT mvt) {
   1137   if (Addr.getOpcode() == ISD::ADD) {
   1138     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
   1139       SDValue base=Addr.getOperand(0);
   1140       if (SelectDirectAddr(base, Base)) {
   1141         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
   1142         return true;
   1143       }
   1144     }
   1145   }
   1146   return false;
   1147 }
   1148 
   1149 // symbol+offset
   1150 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
   1151                                      SDValue &Base, SDValue &Offset) {
   1152   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
   1153 }
   1154 
   1155 // symbol+offset
   1156 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
   1157                                        SDValue &Base, SDValue &Offset) {
   1158   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
   1159 }
   1160 
   1161 // register+offset
   1162 bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
   1163                                          SDValue &Base, SDValue &Offset,
   1164                                          MVT mvt) {
   1165   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
   1166     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
   1167     Offset = CurDAG->getTargetConstant(0, mvt);
   1168     return true;
   1169   }
   1170   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
   1171       Addr.getOpcode() == ISD::TargetGlobalAddress)
   1172     return false;  // direct calls.
   1173 
   1174   if (Addr.getOpcode() == ISD::ADD) {
   1175     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
   1176       return false;
   1177     }
   1178     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
   1179       if (FrameIndexSDNode *FIN =
   1180           dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
   1181         // Constant offset from frame ref.
   1182         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
   1183       else
   1184         Base = Addr.getOperand(0);
   1185       Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
   1186       return true;
   1187     }
   1188   }
   1189   return false;
   1190 }
   1191 
   1192 // register+offset
   1193 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
   1194                                      SDValue &Base, SDValue &Offset) {
   1195   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
   1196 }
   1197 
   1198 // register+offset
   1199 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
   1200                                        SDValue &Base, SDValue &Offset) {
   1201   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
   1202 }
   1203 
   1204 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
   1205                                                  unsigned int spN) const {
   1206   const Value *Src = NULL;
   1207   // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
   1208   // the classof() for MemSDNode does not include MemIntrinsicSDNode
   1209   // (See SelectionDAGNodes.h). So we need to check for both.
   1210   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
   1211     Src = mN->getSrcValue();
   1212   }
   1213   else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
   1214     Src = mN->getSrcValue();
   1215   }
   1216   if (!Src)
   1217     return false;
   1218   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
   1219     return (PT->getAddressSpace() == spN);
   1220   return false;
   1221 }
   1222 
   1223 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   1224 /// inline asm expressions.
   1225 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
   1226                                                      char ConstraintCode,
   1227                                                  std::vector<SDValue> &OutOps) {
   1228   SDValue Op0, Op1;
   1229   switch (ConstraintCode) {
   1230   default: return true;
   1231   case 'm':   // memory
   1232     if (SelectDirectAddr(Op, Op0)) {
   1233       OutOps.push_back(Op0);
   1234       OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
   1235       return false;
   1236     }
   1237     if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
   1238       OutOps.push_back(Op0);
   1239       OutOps.push_back(Op1);
   1240       return false;
   1241     }
   1242     break;
   1243   }
   1244   return true;
   1245 }
   1246 
   1247 // Return true if N is a undef or a constant.
   1248 // If N was undef, return a (i8imm 0) in Retval
   1249 // If N was imm, convert it to i8imm and return in Retval
   1250 // Note: The convert to i8imm is required, otherwise the
   1251 // pattern matcher inserts a bunch of IMOVi8rr to convert
   1252 // the imm to i8imm, and this causes instruction selection
   1253 // to fail.
   1254 bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
   1255                                    SDValue &Retval) {
   1256   if (!(N.getOpcode() == ISD::UNDEF) &&
   1257       !(N.getOpcode() == ISD::Constant))
   1258     return false;
   1259 
   1260   if (N.getOpcode() == ISD::UNDEF)
   1261     Retval = CurDAG->getTargetConstant(0, MVT::i8);
   1262   else {
   1263     ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
   1264     unsigned retval = cn->getZExtValue();
   1265     Retval = CurDAG->getTargetConstant(retval, MVT::i8);
   1266   }
   1267   return true;
   1268 }
   1269