Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines an instruction selector for the NVPTX target.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 
     15 #include "llvm/Instructions.h"
     16 #include "llvm/Support/raw_ostream.h"
     17 #include "NVPTXISelDAGToDAG.h"
     18 #include "llvm/Support/Debug.h"
     19 #include "llvm/Support/ErrorHandling.h"
     20 #include "llvm/Support/CommandLine.h"
     21 #include "llvm/Target/TargetIntrinsicInfo.h"
     22 #include "llvm/GlobalValue.h"
     23 
     24 #undef DEBUG_TYPE
     25 #define DEBUG_TYPE "nvptx-isel"
     26 
     27 using namespace llvm;
     28 
     29 
     30 static cl::opt<bool>
     31 UseFMADInstruction("nvptx-mad-enable",
     32                    cl::ZeroOrMore,
     33                 cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
     34                    cl::init(false));
     35 
     36 static cl::opt<int>
     37 FMAContractLevel("nvptx-fma-level",
     38                  cl::ZeroOrMore,
     39                  cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
     40                      " 1: do it  2: do it aggressively"),
     41                      cl::init(2));
     42 
     43 
     44 static cl::opt<int>
     45 UsePrecDivF32("nvptx-prec-divf32",
     46               cl::ZeroOrMore,
     47              cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
     48                   " IEEE Compliant F32 div.rnd if avaiable."),
     49                   cl::init(2));
     50 
     51 /// createNVPTXISelDag - This pass converts a legalized DAG into a
     52 /// NVPTX-specific DAG, ready for instruction scheduling.
     53 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
     54                                        llvm::CodeGenOpt::Level OptLevel) {
     55   return new NVPTXDAGToDAGISel(TM, OptLevel);
     56 }
     57 
     58 
     59 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
     60                                      CodeGenOpt::Level OptLevel)
     61 : SelectionDAGISel(tm, OptLevel),
     62   Subtarget(tm.getSubtarget<NVPTXSubtarget>())
     63 {
     64   // Always do fma.f32 fpcontract if the target supports the instruction.
     65   // Always do fma.f64 fpcontract if the target supports the instruction.
     66   // Do mad.f32 is nvptx-mad-enable is specified and the target does not
     67   // support fma.f32.
     68 
     69   doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
     70   doFMAF32 =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
     71       (FMAContractLevel>=1);
     72   doFMAF64 =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
     73       (FMAContractLevel>=1);
     74   doFMAF32AGG =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
     75       (FMAContractLevel==2);
     76   doFMAF64AGG =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
     77       (FMAContractLevel==2);
     78 
     79   allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
     80 
     81   UseF32FTZ = false;
     82 
     83   doMulWide = (OptLevel > 0);
     84 
     85   // Decide how to translate f32 div
     86   do_DIVF32_PREC = UsePrecDivF32;
     87   // sm less than sm_20 does not support div.rnd. Use div.full.
     88   if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
     89     do_DIVF32_PREC = 1;
     90 
     91 }
     92 
     93 /// Select - Select instructions not customized! Used for
     94 /// expanded, promoted and normal instructions.
     95 SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
     96 
     97   if (N->isMachineOpcode())
     98     return NULL;   // Already selected.
     99 
    100   SDNode *ResNode = NULL;
    101   switch (N->getOpcode()) {
    102   case ISD::LOAD:
    103     ResNode = SelectLoad(N);
    104     break;
    105   case ISD::STORE:
    106     ResNode = SelectStore(N);
    107     break;
    108   }
    109   if (ResNode)
    110     return ResNode;
    111   return SelectCode(N);
    112 }
    113 
    114 
    115 static unsigned int
    116 getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
    117 {
    118   const Value *Src = N->getSrcValue();
    119   if (!Src)
    120     return NVPTX::PTXLdStInstCode::LOCAL;
    121 
    122   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
    123     switch (PT->getAddressSpace()) {
    124     case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
    125     case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
    126     case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
    127     case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
    128       return NVPTX::PTXLdStInstCode::CONSTANT;
    129     case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
    130     case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
    131     case llvm::ADDRESS_SPACE_CONST:
    132       // If the arch supports generic address space, translate it to GLOBAL
    133       // for correctness.
    134       // If the arch does not support generic address space, then the arch
    135       // does not really support ADDRESS_SPACE_CONST, translate it to
    136       // to CONSTANT for better performance.
    137       if (Subtarget.hasGenericLdSt())
    138         return NVPTX::PTXLdStInstCode::GLOBAL;
    139       else
    140         return NVPTX::PTXLdStInstCode::CONSTANT;
    141     default: break;
    142     }
    143   }
    144   return NVPTX::PTXLdStInstCode::LOCAL;
    145 }
    146 
    147 
    148 SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
    149   DebugLoc dl = N->getDebugLoc();
    150   LoadSDNode *LD = cast<LoadSDNode>(N);
    151   EVT LoadedVT = LD->getMemoryVT();
    152   SDNode *NVPTXLD= NULL;
    153 
    154   // do not support pre/post inc/dec
    155   if (LD->isIndexed())
    156     return NULL;
    157 
    158   if (!LoadedVT.isSimple())
    159     return NULL;
    160 
    161   // Address Space Setting
    162   unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
    163 
    164   // Volatile Setting
    165   // - .volatile is only availalble for .global and .shared
    166   bool isVolatile = LD->isVolatile();
    167   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    168       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    169       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    170     isVolatile = false;
    171 
    172   // Vector Setting
    173   MVT SimpleVT = LoadedVT.getSimpleVT();
    174   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
    175   if (SimpleVT.isVector()) {
    176     unsigned num = SimpleVT.getVectorNumElements();
    177     if (num == 2)
    178       vecType = NVPTX::PTXLdStInstCode::V2;
    179     else if (num == 4)
    180       vecType = NVPTX::PTXLdStInstCode::V4;
    181     else
    182       return NULL;
    183   }
    184 
    185   // Type Setting: fromType + fromTypeWidth
    186   //
    187   // Sign   : ISD::SEXTLOAD
    188   // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
    189   //          type is integer
    190   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
    191   MVT ScalarVT = SimpleVT.getScalarType();
    192   unsigned fromTypeWidth =  ScalarVT.getSizeInBits();
    193   unsigned int fromType;
    194   if ((LD->getExtensionType() == ISD::SEXTLOAD))
    195     fromType = NVPTX::PTXLdStInstCode::Signed;
    196   else if (ScalarVT.isFloatingPoint())
    197     fromType = NVPTX::PTXLdStInstCode::Float;
    198   else
    199     fromType = NVPTX::PTXLdStInstCode::Unsigned;
    200 
    201   // Create the machine instruction DAG
    202   SDValue Chain = N->getOperand(0);
    203   SDValue N1 = N->getOperand(1);
    204   SDValue Addr;
    205   SDValue Offset, Base;
    206   unsigned Opcode;
    207   MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
    208 
    209   if (SelectDirectAddr(N1, Addr)) {
    210     switch (TargetVT) {
    211     case MVT::i8:    Opcode = NVPTX::LD_i8_avar; break;
    212     case MVT::i16:   Opcode = NVPTX::LD_i16_avar; break;
    213     case MVT::i32:   Opcode = NVPTX::LD_i32_avar; break;
    214     case MVT::i64:   Opcode = NVPTX::LD_i64_avar; break;
    215     case MVT::f32:   Opcode = NVPTX::LD_f32_avar; break;
    216     case MVT::f64:   Opcode = NVPTX::LD_f64_avar; break;
    217     case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_avar; break;
    218     case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
    219     case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
    220     case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
    221     case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
    222     case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
    223     case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_avar; break;
    224     case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
    225     case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
    226     case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
    227     default: return NULL;
    228     }
    229     SDValue Ops[] = { getI32Imm(isVolatile),
    230                       getI32Imm(codeAddrSpace),
    231                       getI32Imm(vecType),
    232                       getI32Imm(fromType),
    233                       getI32Imm(fromTypeWidth),
    234                       Addr, Chain };
    235     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
    236                                      MVT::Other, Ops, 7);
    237   } else if (Subtarget.is64Bit()?
    238       SelectADDRsi64(N1.getNode(), N1, Base, Offset):
    239       SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
    240     switch (TargetVT) {
    241     case MVT::i8:    Opcode = NVPTX::LD_i8_asi; break;
    242     case MVT::i16:   Opcode = NVPTX::LD_i16_asi; break;
    243     case MVT::i32:   Opcode = NVPTX::LD_i32_asi; break;
    244     case MVT::i64:   Opcode = NVPTX::LD_i64_asi; break;
    245     case MVT::f32:   Opcode = NVPTX::LD_f32_asi; break;
    246     case MVT::f64:   Opcode = NVPTX::LD_f64_asi; break;
    247     case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_asi; break;
    248     case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
    249     case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
    250     case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
    251     case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
    252     case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
    253     case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_asi; break;
    254     case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
    255     case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
    256     case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
    257     default: return NULL;
    258     }
    259     SDValue Ops[] = { getI32Imm(isVolatile),
    260                       getI32Imm(codeAddrSpace),
    261                       getI32Imm(vecType),
    262                       getI32Imm(fromType),
    263                       getI32Imm(fromTypeWidth),
    264                       Base, Offset, Chain };
    265     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
    266                                      MVT::Other, Ops, 8);
    267   } else if (Subtarget.is64Bit()?
    268       SelectADDRri64(N1.getNode(), N1, Base, Offset):
    269       SelectADDRri(N1.getNode(), N1, Base, Offset)) {
    270     switch (TargetVT) {
    271     case MVT::i8:    Opcode = NVPTX::LD_i8_ari; break;
    272     case MVT::i16:   Opcode = NVPTX::LD_i16_ari; break;
    273     case MVT::i32:   Opcode = NVPTX::LD_i32_ari; break;
    274     case MVT::i64:   Opcode = NVPTX::LD_i64_ari; break;
    275     case MVT::f32:   Opcode = NVPTX::LD_f32_ari; break;
    276     case MVT::f64:   Opcode = NVPTX::LD_f64_ari; break;
    277     case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_ari; break;
    278     case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
    279     case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
    280     case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
    281     case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
    282     case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
    283     case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_ari; break;
    284     case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
    285     case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
    286     case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
    287     default: return NULL;
    288     }
    289     SDValue Ops[] = { getI32Imm(isVolatile),
    290                       getI32Imm(codeAddrSpace),
    291                       getI32Imm(vecType),
    292                       getI32Imm(fromType),
    293                       getI32Imm(fromTypeWidth),
    294                       Base, Offset, Chain };
    295     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
    296                                      MVT::Other, Ops, 8);
    297   }
    298   else {
    299     switch (TargetVT) {
    300     case MVT::i8:    Opcode = NVPTX::LD_i8_areg; break;
    301     case MVT::i16:   Opcode = NVPTX::LD_i16_areg; break;
    302     case MVT::i32:   Opcode = NVPTX::LD_i32_areg; break;
    303     case MVT::i64:   Opcode = NVPTX::LD_i64_areg; break;
    304     case MVT::f32:   Opcode = NVPTX::LD_f32_areg; break;
    305     case MVT::f64:   Opcode = NVPTX::LD_f64_areg; break;
    306     case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_areg; break;
    307     case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
    308     case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
    309     case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
    310     case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
    311     case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
    312     case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_areg; break;
    313     case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
    314     case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
    315     case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
    316     default: return NULL;
    317     }
    318     SDValue Ops[] = { getI32Imm(isVolatile),
    319                       getI32Imm(codeAddrSpace),
    320                       getI32Imm(vecType),
    321                       getI32Imm(fromType),
    322                       getI32Imm(fromTypeWidth),
    323                       N1, Chain };
    324     NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
    325                                      MVT::Other, Ops, 7);
    326   }
    327 
    328   if (NVPTXLD != NULL) {
    329     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
    330     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
    331     cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
    332   }
    333 
    334   return NVPTXLD;
    335 }
    336 
    337 SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
    338   DebugLoc dl = N->getDebugLoc();
    339   StoreSDNode *ST = cast<StoreSDNode>(N);
    340   EVT StoreVT = ST->getMemoryVT();
    341   SDNode *NVPTXST = NULL;
    342 
    343   // do not support pre/post inc/dec
    344   if (ST->isIndexed())
    345     return NULL;
    346 
    347   if (!StoreVT.isSimple())
    348     return NULL;
    349 
    350   // Address Space Setting
    351   unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
    352 
    353   // Volatile Setting
    354   // - .volatile is only availalble for .global and .shared
    355   bool isVolatile = ST->isVolatile();
    356   if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
    357       codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
    358       codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
    359     isVolatile = false;
    360 
    361   // Vector Setting
    362   MVT SimpleVT = StoreVT.getSimpleVT();
    363   unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
    364   if (SimpleVT.isVector()) {
    365     unsigned num = SimpleVT.getVectorNumElements();
    366     if (num == 2)
    367       vecType = NVPTX::PTXLdStInstCode::V2;
    368     else if (num == 4)
    369       vecType = NVPTX::PTXLdStInstCode::V4;
    370     else
    371       return NULL;
    372   }
    373 
    374   // Type Setting: toType + toTypeWidth
    375   // - for integer type, always use 'u'
    376   //
    377   MVT ScalarVT = SimpleVT.getScalarType();
    378   unsigned toTypeWidth =  ScalarVT.getSizeInBits();
    379   unsigned int toType;
    380   if (ScalarVT.isFloatingPoint())
    381     toType = NVPTX::PTXLdStInstCode::Float;
    382   else
    383     toType = NVPTX::PTXLdStInstCode::Unsigned;
    384 
    385   // Create the machine instruction DAG
    386   SDValue Chain = N->getOperand(0);
    387   SDValue N1 = N->getOperand(1);
    388   SDValue N2 = N->getOperand(2);
    389   SDValue Addr;
    390   SDValue Offset, Base;
    391   unsigned Opcode;
    392   MVT::SimpleValueType SourceVT =
    393       N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
    394 
    395   if (SelectDirectAddr(N2, Addr)) {
    396     switch (SourceVT) {
    397     case MVT::i8:    Opcode = NVPTX::ST_i8_avar; break;
    398     case MVT::i16:   Opcode = NVPTX::ST_i16_avar; break;
    399     case MVT::i32:   Opcode = NVPTX::ST_i32_avar; break;
    400     case MVT::i64:   Opcode = NVPTX::ST_i64_avar; break;
    401     case MVT::f32:   Opcode = NVPTX::ST_f32_avar; break;
    402     case MVT::f64:   Opcode = NVPTX::ST_f64_avar; break;
    403     case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_avar; break;
    404     case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
    405     case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
    406     case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
    407     case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
    408     case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
    409     case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_avar; break;
    410     case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
    411     case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
    412     case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
    413     default: return NULL;
    414     }
    415     SDValue Ops[] = { N1,
    416                       getI32Imm(isVolatile),
    417                       getI32Imm(codeAddrSpace),
    418                       getI32Imm(vecType),
    419                       getI32Imm(toType),
    420                       getI32Imm(toTypeWidth),
    421                       Addr, Chain };
    422     NVPTXST = CurDAG->getMachineNode(Opcode, dl,
    423                                      MVT::Other, Ops, 8);
    424   } else if (Subtarget.is64Bit()?
    425       SelectADDRsi64(N2.getNode(), N2, Base, Offset):
    426       SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
    427     switch (SourceVT) {
    428     case MVT::i8:    Opcode = NVPTX::ST_i8_asi; break;
    429     case MVT::i16:   Opcode = NVPTX::ST_i16_asi; break;
    430     case MVT::i32:   Opcode = NVPTX::ST_i32_asi; break;
    431     case MVT::i64:   Opcode = NVPTX::ST_i64_asi; break;
    432     case MVT::f32:   Opcode = NVPTX::ST_f32_asi; break;
    433     case MVT::f64:   Opcode = NVPTX::ST_f64_asi; break;
    434     case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_asi; break;
    435     case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
    436     case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
    437     case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
    438     case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
    439     case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
    440     case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_asi; break;
    441     case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
    442     case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
    443     case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
    444     default: return NULL;
    445     }
    446     SDValue Ops[] = { N1,
    447                       getI32Imm(isVolatile),
    448                       getI32Imm(codeAddrSpace),
    449                       getI32Imm(vecType),
    450                       getI32Imm(toType),
    451                       getI32Imm(toTypeWidth),
    452                       Base, Offset, Chain };
    453     NVPTXST = CurDAG->getMachineNode(Opcode, dl,
    454                                      MVT::Other, Ops, 9);
    455   } else if (Subtarget.is64Bit()?
    456       SelectADDRri64(N2.getNode(), N2, Base, Offset):
    457       SelectADDRri(N2.getNode(), N2, Base, Offset)) {
    458     switch (SourceVT) {
    459     case MVT::i8:    Opcode = NVPTX::ST_i8_ari; break;
    460     case MVT::i16:   Opcode = NVPTX::ST_i16_ari; break;
    461     case MVT::i32:   Opcode = NVPTX::ST_i32_ari; break;
    462     case MVT::i64:   Opcode = NVPTX::ST_i64_ari; break;
    463     case MVT::f32:   Opcode = NVPTX::ST_f32_ari; break;
    464     case MVT::f64:   Opcode = NVPTX::ST_f64_ari; break;
    465     case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_ari; break;
    466     case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
    467     case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
    468     case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
    469     case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
    470     case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
    471     case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_ari; break;
    472     case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
    473     case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
    474     case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
    475     default: return NULL;
    476     }
    477     SDValue Ops[] = { N1,
    478                       getI32Imm(isVolatile),
    479                       getI32Imm(codeAddrSpace),
    480                       getI32Imm(vecType),
    481                       getI32Imm(toType),
    482                       getI32Imm(toTypeWidth),
    483                       Base, Offset, Chain };
    484     NVPTXST = CurDAG->getMachineNode(Opcode, dl,
    485                                      MVT::Other, Ops, 9);
    486   } else {
    487     switch (SourceVT) {
    488     case MVT::i8:    Opcode = NVPTX::ST_i8_areg; break;
    489     case MVT::i16:   Opcode = NVPTX::ST_i16_areg; break;
    490     case MVT::i32:   Opcode = NVPTX::ST_i32_areg; break;
    491     case MVT::i64:   Opcode = NVPTX::ST_i64_areg; break;
    492     case MVT::f32:   Opcode = NVPTX::ST_f32_areg; break;
    493     case MVT::f64:   Opcode = NVPTX::ST_f64_areg; break;
    494     case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_areg; break;
    495     case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
    496     case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
    497     case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
    498     case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
    499     case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
    500     case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_areg; break;
    501     case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
    502     case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
    503     case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
    504     default: return NULL;
    505     }
    506     SDValue Ops[] = { N1,
    507                       getI32Imm(isVolatile),
    508                       getI32Imm(codeAddrSpace),
    509                       getI32Imm(vecType),
    510                       getI32Imm(toType),
    511                       getI32Imm(toTypeWidth),
    512                       N2, Chain };
    513     NVPTXST = CurDAG->getMachineNode(Opcode, dl,
    514                                      MVT::Other, Ops, 8);
    515   }
    516 
    517   if (NVPTXST != NULL) {
    518     MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
    519     MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
    520     cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
    521   }
    522 
    523   return NVPTXST;
    524 }
    525 
    526 // SelectDirectAddr - Match a direct address for DAG.
    527 // A direct address could be a globaladdress or externalsymbol.
    528 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
    529   // Return true if TGA or ES.
    530   if (N.getOpcode() == ISD::TargetGlobalAddress
    531       || N.getOpcode() == ISD::TargetExternalSymbol) {
    532     Address = N;
    533     return true;
    534   }
    535   if (N.getOpcode() == NVPTXISD::Wrapper) {
    536     Address = N.getOperand(0);
    537     return true;
    538   }
    539   if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
    540     unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
    541     if (IID == Intrinsic::nvvm_ptr_gen_to_param)
    542       if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
    543         return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
    544   }
    545   return false;
    546 }
    547 
    548 // symbol+offset
    549 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
    550                                          SDValue &Base, SDValue &Offset,
    551                                          MVT mvt) {
    552   if (Addr.getOpcode() == ISD::ADD) {
    553     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
    554       SDValue base=Addr.getOperand(0);
    555       if (SelectDirectAddr(base, Base)) {
    556         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
    557         return true;
    558       }
    559     }
    560   }
    561   return false;
    562 }
    563 
    564 // symbol+offset
    565 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
    566                                      SDValue &Base, SDValue &Offset) {
    567   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
    568 }
    569 
    570 // symbol+offset
    571 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
    572                                        SDValue &Base, SDValue &Offset) {
    573   return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
    574 }
    575 
    576 // register+offset
    577 bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
    578                                          SDValue &Base, SDValue &Offset,
    579                                          MVT mvt) {
    580   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
    581     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
    582     Offset = CurDAG->getTargetConstant(0, mvt);
    583     return true;
    584   }
    585   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
    586       Addr.getOpcode() == ISD::TargetGlobalAddress)
    587     return false;  // direct calls.
    588 
    589   if (Addr.getOpcode() == ISD::ADD) {
    590     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
    591       return false;
    592     }
    593     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
    594       if (FrameIndexSDNode *FIN =
    595           dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
    596         // Constant offset from frame ref.
    597         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
    598       else
    599         Base = Addr.getOperand(0);
    600       Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
    601       return true;
    602     }
    603   }
    604   return false;
    605 }
    606 
    607 // register+offset
    608 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
    609                                      SDValue &Base, SDValue &Offset) {
    610   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
    611 }
    612 
    613 // register+offset
    614 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
    615                                        SDValue &Base, SDValue &Offset) {
    616   return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
    617 }
    618 
    619 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
    620                                                  unsigned int spN) const {
    621   const Value *Src = NULL;
    622   // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
    623   // the classof() for MemSDNode does not include MemIntrinsicSDNode
    624   // (See SelectionDAGNodes.h). So we need to check for both.
    625   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
    626     Src = mN->getSrcValue();
    627   }
    628   else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
    629     Src = mN->getSrcValue();
    630   }
    631   if (!Src)
    632     return false;
    633   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
    634     return (PT->getAddressSpace() == spN);
    635   return false;
    636 }
    637 
    638 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
    639 /// inline asm expressions.
    640 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
    641                                                      char ConstraintCode,
    642                                                  std::vector<SDValue> &OutOps) {
    643   SDValue Op0, Op1;
    644   switch (ConstraintCode) {
    645   default: return true;
    646   case 'm':   // memory
    647     if (SelectDirectAddr(Op, Op0)) {
    648       OutOps.push_back(Op0);
    649       OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
    650       return false;
    651     }
    652     if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
    653       OutOps.push_back(Op0);
    654       OutOps.push_back(Op1);
    655       return false;
    656     }
    657     break;
    658   }
    659   return true;
    660 }
    661 
    662 // Return true if N is a undef or a constant.
    663 // If N was undef, return a (i8imm 0) in Retval
    664 // If N was imm, convert it to i8imm and return in Retval
    665 // Note: The convert to i8imm is required, otherwise the
    666 // pattern matcher inserts a bunch of IMOVi8rr to convert
    667 // the imm to i8imm, and this causes instruction selection
    668 // to fail.
    669 bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
    670                                    SDValue &Retval) {
    671   if (!(N.getOpcode() == ISD::UNDEF) &&
    672       !(N.getOpcode() == ISD::Constant))
    673     return false;
    674 
    675   if (N.getOpcode() == ISD::UNDEF)
    676     Retval = CurDAG->getTargetConstant(0, MVT::i8);
    677   else {
    678     ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
    679     unsigned retval = cn->getZExtValue();
    680     Retval = CurDAG->getTargetConstant(retval, MVT::i8);
    681   }
    682   return true;
    683 }
    684