Home | History | Annotate | Download | only in NVPTX
      1 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file describes the PTX instructions in TableGen format.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 include "NVPTXInstrFormats.td"
     15 
     16 // A NOP instruction
     17 def NOP : NVPTXInst<(outs), (ins), "", []>;
     18 
     19 // List of vector specific properties
     20 def isVecLD      : VecInstTypeEnum<1>;
     21 def isVecST      : VecInstTypeEnum<2>;
     22 def isVecBuild   : VecInstTypeEnum<3>;
     23 def isVecShuffle : VecInstTypeEnum<4>;
     24 def isVecExtract : VecInstTypeEnum<5>;
     25 def isVecInsert  : VecInstTypeEnum<6>;
     26 def isVecDest    : VecInstTypeEnum<7>;
     27 def isVecOther   : VecInstTypeEnum<15>;
     28 
     29 //===----------------------------------------------------------------------===//
     30 // NVPTX Operand Definitions.
     31 //===----------------------------------------------------------------------===//
     32 
     33 def brtarget    : Operand<OtherVT>;
     34 
     35 //===----------------------------------------------------------------------===//
     36 // NVPTX Instruction Predicate Definitions
     37 //===----------------------------------------------------------------------===//
     38 
     39 
     40 def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
     41 def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
     42 def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
     43 def useAtomRedG32forGen32 :
     44   Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
     45 def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
     46 def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
     47 def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
     48 def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
     49 def useAtomRedG64forGen64 :
     50   Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
     51 def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
     52 def hasVote : Predicate<"Subtarget.hasVote()">;
     53 def hasDouble : Predicate<"Subtarget.hasDouble()">;
     54 def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
     55 def hasLDG : Predicate<"Subtarget.hasLDG()">;
     56 def hasLDU : Predicate<"Subtarget.hasLDU()">;
     57 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
     58 
     59 def doF32FTZ : Predicate<"UseF32FTZ">;
     60 
     61 def doFMAF32      : Predicate<"doFMAF32">;
     62 def doFMAF32_ftz  : Predicate<"(doFMAF32 && UseF32FTZ)">;
     63 def doFMAF32AGG      : Predicate<"doFMAF32AGG">;
     64 def doFMAF32AGG_ftz  : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
     65 def doFMAF64      : Predicate<"doFMAF64">;
     66 def doFMAF64AGG      : Predicate<"doFMAF64AGG">;
     67 def doFMADF32     : Predicate<"doFMADF32">;
     68 def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
     69 
     70 def doMulWide      : Predicate<"doMulWide">;
     71 
     72 def allowFMA : Predicate<"allowFMA">;
     73 def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
     74 
     75 def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
     76 def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
     77 
     78 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
     79 
     80 def true : Predicate<"1">;
     81 
     82 //===----------------------------------------------------------------------===//
     83 // Special Handling for 8-bit Operands and Operations
     84 //
     85 // PTX supports 8-bit signed and unsigned types, but does not support 8-bit
     86 // operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
     87 // 8-bit registers.
     88 //
     89 // PTX ld, st and cvt instructions permit source and destination data operands
     90 // to be wider than the instruction-type size, so that narrow values may be
     91 // loaded, stored, and converted using regular-width registers.
     92 //
     93 // So in PTX generation, we
     94 // - always use 16-bit registers in place in 8-bit registers.
     95 //   (8-bit variables should stay as 8-bit as they represent memory layout.)
     96 // - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
     97 //   before operation
     98 //   . div
     99 //   . rem
    100 //   . neg (sign)
    101 //   . set, setp
    102 //   . shr
    103 //
    104 // We are patching the operations by inserting the cvt instructions in the
    105 // asm strings of the affected instructions.
    106 //
    107 // Since vector operations, except for ld/st, are eventually elementized. We
    108 // do not need to special-hand the vector 8-bit operations.
    109 //
    110 //
    111 //===----------------------------------------------------------------------===//
    112 
    113 // Generate string block like
    114 // {
    115 //   .reg .s16 %temp1;
    116 //   .reg .s16 %temp2;
    117 //   cvt.s16.s8 %temp1, %a;
    118 //   cvt.s16.s8 %temp2, %b;
    119 //   opc.s16    %dst, %temp1, %temp2;
    120 // }
    121 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
    122 class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
    123   string s = !strconcat("{{\n\t",
    124              !strconcat(".reg .", !strconcat(TypeStr,
    125              !strconcat(" \t%temp1;\n\t",
    126              !strconcat(".reg .", !strconcat(TypeStr,
    127              !strconcat(" \t%temp2;\n\t",
    128              !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
    129              !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
    130              !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
    131 }
    132 
    133 // Generate string block like
    134 // {
    135 //   .reg .s16 %temp1;
    136 //   .reg .s16 %temp2;
    137 //   cvt.s16.s8 %temp1, %a;
    138 //   mov.b16    %temp2, %b;
    139 //   cvt.s16.s8 %temp2, %temp2;
    140 //   opc.s16    %dst, %temp1, %temp2;
    141 // }
    142 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
    143 class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
    144   string s = !strconcat("{{\n\t",
    145              !strconcat(".reg .", !strconcat(TypeStr,
    146              !strconcat(" \t%temp1;\n\t",
    147              !strconcat(".reg .",
    148              !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
    149              !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
    150              !strconcat("mov.b16 \t%temp2, $b;\n\t",
    151              !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
    152              !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
    153 }
    154 
    155 // Generate string block like
    156 // {
    157 //   .reg .s16 %temp1;
    158 //   .reg .s16 %temp2;
    159 //   mov.b16    %temp1, %b;
    160 //   cvt.s16.s8 %temp1, %temp1;
    161 //   cvt.s16.s8 %temp2, %a;
    162 //   opc.s16    %dst, %temp1, %temp2;
    163 // }
    164 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
    165 class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
    166   string s = !strconcat("{{\n\t",
    167              !strconcat(".reg .", !strconcat(TypeStr,
    168              !strconcat(" \t%temp1;\n\t",
    169              !strconcat(".reg .", !strconcat(TypeStr,
    170              !strconcat(" \t%temp2;\n\t",
    171              !strconcat("mov.b16 \t%temp1, $a;\n\t",
    172              !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
    173              !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
    174              !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
    175 }
    176 
    177 
    178 //===----------------------------------------------------------------------===//
    179 // Some Common Instruction Class Templates
    180 //===----------------------------------------------------------------------===//
    181 
    182 multiclass I3<string OpcStr, SDNode OpNode> {
    183   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
    184                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
    185                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
    186                        Int64Regs:$b))]>;
    187   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
    188                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
    189                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
    190   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
    191                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
    192                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
    193                        Int32Regs:$b))]>;
    194   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
    195                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
    196                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
    197   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
    198                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
    199                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
    200                        Int16Regs:$b))]>;
    201   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
    202                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
    203                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
    204   def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
    205                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
    206                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
    207   def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
    208                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
    209                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
    210 }
    211 
    212 multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
    213   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
    214                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
    215                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
    216                        Int64Regs:$b))]>;
    217   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
    218                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
    219                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
    220   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
    221                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
    222                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
    223                        Int32Regs:$b))]>;
    224   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
    225                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
    226                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
    227   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
    228                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
    229                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
    230                        Int16Regs:$b))]>;
    231   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
    232                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
    233                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
    234   def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
    235                      Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
    236                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
    237   def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
    238                      Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
    239                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
    240 }
    241 
    242 multiclass I3_noi8<string OpcStr, SDNode OpNode> {
    243   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
    244                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
    245                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
    246                        Int64Regs:$b))]>;
    247   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
    248                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
    249                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
    250   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
    251                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
    252                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
    253                        Int32Regs:$b))]>;
    254   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
    255                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
    256                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
    257   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
    258                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
    259                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
    260                        Int16Regs:$b))]>;
    261   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
    262                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
    263                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
    264 }
    265 
    266 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
    267    def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
    268        Int32Regs:$b),
    269                       !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
    270                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
    271                         Int32Regs:$b))]>;
    272    def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
    273                       !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
    274                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
    275 }
    276 
    277 multiclass F3<string OpcStr, SDNode OpNode> {
    278    def f64rr : NVPTXInst<(outs Float64Regs:$dst),
    279                       (ins Float64Regs:$a, Float64Regs:$b),
    280                       !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
    281                       [(set Float64Regs:$dst,
    282                         (OpNode Float64Regs:$a, Float64Regs:$b))]>,
    283                       Requires<[allowFMA]>;
    284    def f64ri : NVPTXInst<(outs Float64Regs:$dst),
    285                       (ins Float64Regs:$a, f64imm:$b),
    286                       !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
    287                       [(set Float64Regs:$dst,
    288                         (OpNode Float64Regs:$a, fpimm:$b))]>,
    289                       Requires<[allowFMA]>;
    290    def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
    291                       (ins Float32Regs:$a, Float32Regs:$b),
    292                       !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
    293                       [(set Float32Regs:$dst,
    294                         (OpNode Float32Regs:$a, Float32Regs:$b))]>,
    295                       Requires<[allowFMA_ftz]>;
    296    def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
    297                       (ins Float32Regs:$a, f32imm:$b),
    298                       !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
    299                       [(set Float32Regs:$dst,
    300                         (OpNode Float32Regs:$a, fpimm:$b))]>,
    301                       Requires<[allowFMA_ftz]>;
    302    def f32rr : NVPTXInst<(outs Float32Regs:$dst),
    303                       (ins Float32Regs:$a, Float32Regs:$b),
    304                       !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
    305                       [(set Float32Regs:$dst,
    306                         (OpNode Float32Regs:$a, Float32Regs:$b))]>,
    307                       Requires<[allowFMA]>;
    308    def f32ri : NVPTXInst<(outs Float32Regs:$dst),
    309                       (ins Float32Regs:$a, f32imm:$b),
    310                       !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
    311                       [(set Float32Regs:$dst,
    312                         (OpNode Float32Regs:$a, fpimm:$b))]>,
    313                       Requires<[allowFMA]>;
    314 }
    315 
    316 multiclass F3_rn<string OpcStr, SDNode OpNode> {
    317    def f64rr : NVPTXInst<(outs Float64Regs:$dst),
    318                       (ins Float64Regs:$a, Float64Regs:$b),
    319                       !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
    320                       [(set Float64Regs:$dst,
    321                         (OpNode Float64Regs:$a, Float64Regs:$b))]>;
    322    def f64ri : NVPTXInst<(outs Float64Regs:$dst),
    323                       (ins Float64Regs:$a, f64imm:$b),
    324                       !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
    325                       [(set Float64Regs:$dst,
    326                         (OpNode Float64Regs:$a, fpimm:$b))]>;
    327    def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
    328                       (ins Float32Regs:$a, Float32Regs:$b),
    329                       !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
    330                       [(set Float32Regs:$dst,
    331                         (OpNode Float32Regs:$a, Float32Regs:$b))]>,
    332                       Requires<[doF32FTZ]>;
    333    def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
    334                       (ins Float32Regs:$a, f32imm:$b),
    335                       !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
    336                       [(set Float32Regs:$dst,
    337                         (OpNode Float32Regs:$a, fpimm:$b))]>,
    338                       Requires<[doF32FTZ]>;
    339    def f32rr : NVPTXInst<(outs Float32Regs:$dst),
    340                       (ins Float32Regs:$a, Float32Regs:$b),
    341                       !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
    342                       [(set Float32Regs:$dst,
    343                         (OpNode Float32Regs:$a, Float32Regs:$b))]>;
    344    def f32ri : NVPTXInst<(outs Float32Regs:$dst),
    345                       (ins Float32Regs:$a, f32imm:$b),
    346                       !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
    347                       [(set Float32Regs:$dst,
    348                         (OpNode Float32Regs:$a, fpimm:$b))]>;
    349 }
    350 
    351 multiclass F2<string OpcStr, SDNode OpNode> {
    352    def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
    353                       !strconcat(OpcStr, ".f64 \t$dst, $a;"),
    354                       [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
    355    def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
    356                       !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
    357                       [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
    358                       Requires<[doF32FTZ]>;
    359    def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
    360                       !strconcat(OpcStr, ".f32 \t$dst, $a;"),
    361                       [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
    362 }
    363 
    364 //===----------------------------------------------------------------------===//
    365 // NVPTX Instructions.
    366 //===----------------------------------------------------------------------===//
    367 
    368 //-----------------------------------
    369 // Integer Arithmetic
    370 //-----------------------------------
    371 
    372 multiclass ADD_SUB_i1<SDNode OpNode> {
    373    def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
    374           "xor.pred \t$dst, $a, $b;",
    375       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
    376    def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
    377           "xor.pred \t$dst, $a, $b;",
    378       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
    379 }
    380 
    381 defm ADD_i1 : ADD_SUB_i1<add>;
    382 defm SUB_i1 : ADD_SUB_i1<sub>;
    383 
    384 
    385 defm ADD : I3<"add.s", add>;
    386 defm SUB : I3<"sub.s", sub>;
    387 
    388 defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
    389 defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
    390 
    391 defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
    392 defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
    393 
    394 //mul.wide PTX instruction
    395 def SInt32Const : PatLeaf<(imm), [{
    396   const APInt &v = N->getAPIntValue();
    397   if (v.isSignedIntN(32))
    398     return true;
    399   return false;
    400 }]>;
    401 
    402 def UInt32Const : PatLeaf<(imm), [{
    403   const APInt &v = N->getAPIntValue();
    404   if (v.isIntN(32))
    405     return true;
    406   return false;
    407 }]>;
    408 
    409 def SInt16Const : PatLeaf<(imm), [{
    410   const APInt &v = N->getAPIntValue();
    411   if (v.isSignedIntN(16))
    412     return true;
    413   return false;
    414 }]>;
    415 
    416 def UInt16Const : PatLeaf<(imm), [{
    417   const APInt &v = N->getAPIntValue();
    418   if (v.isIntN(16))
    419     return true;
    420   return false;
    421 }]>;
    422 
    423 def Int5Const : PatLeaf<(imm), [{
    424   const APInt &v = N->getAPIntValue();
    425   // Check if 0 <= v < 32
    426   // Only then the result from (x << v) will be i32
    427   if (v.sge(0) && v.slt(32))
    428     return true;
    429   return false;
    430 }]>;
    431 
    432 def Int4Const : PatLeaf<(imm), [{
    433   const APInt &v = N->getAPIntValue();
    434   // Check if 0 <= v < 16
    435   // Only then the result from (x << v) will be i16
    436   if (v.sge(0) && v.slt(16))
    437     return true;
    438   return false;
    439 }]>;
    440 
    441 def SHL2MUL32 : SDNodeXForm<imm, [{
    442   const APInt &v = N->getAPIntValue();
    443   APInt temp(32, 1);
    444   return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
    445 }]>;
    446 
    447 def SHL2MUL16 : SDNodeXForm<imm, [{
    448   const APInt &v = N->getAPIntValue();
    449   APInt temp(16, 1);
    450   return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
    451 }]>;
    452 
    453 def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
    454                            (ins Int32Regs:$a, Int32Regs:$b),
    455                            "mul.wide.s32 \t$dst, $a, $b;", []>;
    456 def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
    457                             (ins Int32Regs:$a, i64imm:$b),
    458                            "mul.wide.s32 \t$dst, $a, $b;", []>;
    459 
    460 def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
    461                            (ins Int32Regs:$a, Int32Regs:$b),
    462                            "mul.wide.u32 \t$dst, $a, $b;", []>;
    463 def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
    464                             (ins Int32Regs:$a, i64imm:$b),
    465                            "mul.wide.u32 \t$dst, $a, $b;", []>;
    466 
    467 def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
    468                             (ins Int16Regs:$a, Int16Regs:$b),
    469                            "mul.wide.s16 \t$dst, $a, $b;", []>;
    470 def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
    471                             (ins Int16Regs:$a, i32imm:$b),
    472                            "mul.wide.s16 \t$dst, $a, $b;", []>;
    473 
    474 def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
    475                             (ins Int16Regs:$a, Int16Regs:$b),
    476                            "mul.wide.u16 \t$dst, $a, $b;", []>;
    477 def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
    478                             (ins Int16Regs:$a, i32imm:$b),
    479                            "mul.wide.u16 \t$dst, $a, $b;", []>;
    480 
    481 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
    482           (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
    483           Requires<[doMulWide]>;
    484 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
    485           (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
    486           Requires<[doMulWide]>;
    487 
    488 def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
    489           (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
    490           Requires<[doMulWide]>;
    491 def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
    492           (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
    493           Requires<[doMulWide]>;
    494 
    495 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
    496           (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
    497           Requires<[doMulWide]>;
    498 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
    499           (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
    500           Requires<[doMulWide]>;
    501 
    502 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
    503           (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
    504 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
    505           (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
    506           Requires<[doMulWide]>;
    507 
    508 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
    509           (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
    510 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
    511           (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
    512           Requires<[doMulWide]>;
    513 
    514 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
    515           (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
    516 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
    517           (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
    518           Requires<[doMulWide]>;
    519 
    520 defm MULT : I3<"mul.lo.s", mul>;
    521 
    522 defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
    523 defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
    524 def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
    525             !strconcat("{{ \n\t",
    526             !strconcat(".reg \t.s16 temp1; \n\t",
    527             !strconcat(".reg \t.s16 temp2; \n\t",
    528             !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
    529             !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
    530             !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
    531             !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
    532             !strconcat("}}", "")))))))),
    533       [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
    534 def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
    535             !strconcat("{{ \n\t",
    536             !strconcat(".reg \t.s16 temp1; \n\t",
    537             !strconcat(".reg \t.s16 temp2; \n\t",
    538             !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
    539             !strconcat("mov.b16 \ttemp2, $b; \n\t",
    540             !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
    541             !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
    542             !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
    543             !strconcat("}}", ""))))))))),
    544       [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
    545 def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
    546             !strconcat("{{ \n\t",
    547             !strconcat(".reg \t.u16 temp1; \n\t",
    548             !strconcat(".reg \t.u16 temp2; \n\t",
    549             !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
    550             !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
    551             !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
    552             !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
    553             !strconcat("}}", "")))))))),
    554       [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
    555 def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
    556             !strconcat("{{ \n\t",
    557             !strconcat(".reg \t.u16 temp1; \n\t",
    558             !strconcat(".reg \t.u16 temp2; \n\t",
    559             !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
    560             !strconcat("mov.b16 \ttemp2, $b; \n\t",
    561             !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
    562             !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
    563             !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
    564             !strconcat("}}", ""))))))))),
    565       [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
    566 
    567 
    568 defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
    569 defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
    570 
    571 defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
    572 // The ri version will not be selected as DAGCombiner::visitSREM will lower it.
    573 defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
    574 // The ri version will not be selected as DAGCombiner::visitUREM will lower it.
    575 
    576 def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
    577                       (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
    578                       "mad.lo.s16 \t$dst, $a, $b, $c;",
    579                       [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
    580                         Int8Regs:$c))]>;
    581 def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
    582                       (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
    583                       "mad.lo.s16 \t$dst, $a, $b, $c;",
    584                       [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
    585                         imm:$c))]>;
    586 def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
    587                       (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
    588                       "mad.lo.s16 \t$dst, $a, $b, $c;",
    589                       [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
    590                         Int8Regs:$c))]>;
    591 def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
    592                       (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
    593                       "mad.lo.s16 \t$dst, $a, $b, $c;",
    594                       [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
    595                         imm:$c))]>;
    596 
    597 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
    598                       (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
    599                       "mad.lo.s16 \t$dst, $a, $b, $c;",
    600                       [(set Int16Regs:$dst, (add
    601                         (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
    602 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
    603                       (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
    604                       "mad.lo.s16 \t$dst, $a, $b, $c;",
    605                       [(set Int16Regs:$dst, (add
    606                         (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
    607 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
    608                       (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
    609                       "mad.lo.s16 \t$dst, $a, $b, $c;",
    610                       [(set Int16Regs:$dst, (add
    611                         (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
    612 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
    613     (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
    614                       "mad.lo.s16 \t$dst, $a, $b, $c;",
    615                       [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
    616                         imm:$c))]>;
    617 
    618 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
    619                       (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
    620                       "mad.lo.s32 \t$dst, $a, $b, $c;",
    621                       [(set Int32Regs:$dst, (add
    622                         (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
    623 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
    624                       (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
    625                       "mad.lo.s32 \t$dst, $a, $b, $c;",
    626                       [(set Int32Regs:$dst, (add
    627                         (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
    628 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
    629                       (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
    630                       "mad.lo.s32 \t$dst, $a, $b, $c;",
    631                       [(set Int32Regs:$dst, (add
    632                         (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
    633 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
    634                       (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
    635                       "mad.lo.s32 \t$dst, $a, $b, $c;",
    636                       [(set Int32Regs:$dst, (add
    637                         (mul Int32Regs:$a, imm:$b), imm:$c))]>;
    638 
    639 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
    640                       (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
    641                       "mad.lo.s64 \t$dst, $a, $b, $c;",
    642                       [(set Int64Regs:$dst, (add
    643                         (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
    644 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
    645                       (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
    646                       "mad.lo.s64 \t$dst, $a, $b, $c;",
    647                       [(set Int64Regs:$dst, (add
    648                         (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
    649 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
    650                       (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
    651                       "mad.lo.s64 \t$dst, $a, $b, $c;",
    652                       [(set Int64Regs:$dst, (add
    653                         (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
    654 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
    655                       (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
    656                       "mad.lo.s64 \t$dst, $a, $b, $c;",
    657                       [(set Int64Regs:$dst, (add
    658                         (mul Int64Regs:$a, imm:$b), imm:$c))]>;
    659 
    660 
    661 def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
    662                      !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
    663                                  "neg.s16 \t$dst, $dst;"),
    664          [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
    665 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
    666                      "neg.s16 \t$dst, $src;",
    667          [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
    668 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
    669                      "neg.s32 \t$dst, $src;",
    670          [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
    671 def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
    672                      "neg.s64 \t$dst, $src;",
    673          [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
    674 
    675 //-----------------------------------
    676 // Floating Point Arithmetic
    677 //-----------------------------------
    678 
    679 // Constant 1.0f
    680 def FloatConst1 : PatLeaf<(fpimm), [{
    681     if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
    682       return false;
    683     float f = (float)N->getValueAPF().convertToFloat();
    684     return (f==1.0f);
    685 }]>;
    686 // Constand (double)1.0
    687 def DoubleConst1 : PatLeaf<(fpimm), [{
    688     if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
    689       return false;
    690     double d = (double)N->getValueAPF().convertToDouble();
    691     return (d==1.0);
    692 }]>;
    693 
    694 defm FADD : F3<"add", fadd>;
    695 defm FSUB : F3<"sub", fsub>;
    696 defm FMUL : F3<"mul", fmul>;
    697 
    698 defm FADD_rn : F3_rn<"add", fadd>;
    699 defm FSUB_rn : F3_rn<"sub", fsub>;
    700 defm FMUL_rn : F3_rn<"mul", fmul>;
    701 
    702 defm FABS : F2<"abs", fabs>;
    703 defm FNEG : F2<"neg", fneg>;
    704 defm FSQRT : F2<"sqrt.rn", fsqrt>;
    705 
    706 //
    707 // F64 division
    708 //
    709 def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
    710                       (ins f64imm:$a, Float64Regs:$b),
    711                       "rcp.rn.f64 \t$dst, $b;",
    712                       [(set Float64Regs:$dst,
    713                         (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
    714 def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
    715                       (ins Float64Regs:$a, Float64Regs:$b),
    716                       "div.rn.f64 \t$dst, $a, $b;",
    717                       [(set Float64Regs:$dst,
    718                         (fdiv Float64Regs:$a, Float64Regs:$b))]>;
    719 def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
    720                       (ins Float64Regs:$a, f64imm:$b),
    721                       "div.rn.f64 \t$dst, $a, $b;",
    722                       [(set Float64Regs:$dst,
    723                         (fdiv Float64Regs:$a, fpimm:$b))]>;
    724 
    725 //
    726 // F32 Approximate reciprocal
    727 //
    728 def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
    729                       (ins f32imm:$a, Float32Regs:$b),
    730                       "rcp.approx.ftz.f32 \t$dst, $b;",
    731                       [(set Float32Regs:$dst,
    732                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
    733                       Requires<[do_DIVF32_APPROX, doF32FTZ]>;
    734 def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
    735                         (ins f32imm:$a, Float32Regs:$b),
    736                        "rcp.approx.f32 \t$dst, $b;",
    737                       [(set Float32Regs:$dst,
    738                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
    739                       Requires<[do_DIVF32_APPROX]>;
    740 //
    741 // F32 Approximate division
    742 //
    743 def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
    744                       (ins Float32Regs:$a, Float32Regs:$b),
    745                       "div.approx.ftz.f32 \t$dst, $a, $b;",
    746                       [(set Float32Regs:$dst,
    747                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
    748                       Requires<[do_DIVF32_APPROX, doF32FTZ]>;
    749 def FDIV32approxrr     : NVPTXInst<(outs Float32Regs:$dst),
    750                       (ins Float32Regs:$a, Float32Regs:$b),
    751                       "div.approx.f32 \t$dst, $a, $b;",
    752                       [(set Float32Regs:$dst,
    753                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
    754                       Requires<[do_DIVF32_APPROX]>;
    755 //
    756 // F32 Semi-accurate reciprocal
    757 //
    758 // rcp.approx gives the same result as div.full(1.0f, a) and is faster.
    759 //
    760 def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
    761                       (ins f32imm:$a, Float32Regs:$b),
    762                       "rcp.approx.ftz.f32 \t$dst, $b;",
    763                       [(set Float32Regs:$dst,
    764                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
    765                       Requires<[do_DIVF32_FULL, doF32FTZ]>;
    766 def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
    767                       (ins f32imm:$a, Float32Regs:$b),
    768                       "rcp.approx.f32 \t$dst, $b;",
    769                       [(set Float32Regs:$dst,
    770                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
    771                       Requires<[do_DIVF32_FULL]>;
    772 //
    773 // F32 Semi-accurate division
    774 //
    775 def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
    776                       (ins Float32Regs:$a, Float32Regs:$b),
    777                       "div.full.ftz.f32 \t$dst, $a, $b;",
    778                       [(set Float32Regs:$dst,
    779                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
    780                       Requires<[do_DIVF32_FULL, doF32FTZ]>;
    781 def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
    782                       (ins Float32Regs:$a, f32imm:$b),
    783                       "div.full.ftz.f32 \t$dst, $a, $b;",
    784                       [(set Float32Regs:$dst,
    785                         (fdiv Float32Regs:$a, fpimm:$b))]>,
    786                       Requires<[do_DIVF32_FULL, doF32FTZ]>;
    787 def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
    788                       (ins Float32Regs:$a, Float32Regs:$b),
    789                       "div.full.f32 \t$dst, $a, $b;",
    790                       [(set Float32Regs:$dst,
    791                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
    792                       Requires<[do_DIVF32_FULL]>;
    793 def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
    794                       (ins Float32Regs:$a, f32imm:$b),
    795                       "div.full.f32 \t$dst, $a, $b;",
    796                       [(set Float32Regs:$dst,
    797                         (fdiv Float32Regs:$a, fpimm:$b))]>,
    798                       Requires<[do_DIVF32_FULL]>;
    799 //
    800 // F32 Accurate reciprocal
    801 //
    802 def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
    803                         (ins f32imm:$a, Float32Regs:$b),
    804                        "rcp.rn.ftz.f32 \t$dst, $b;",
    805                       [(set Float32Regs:$dst,
    806                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
    807                       Requires<[reqPTX20, doF32FTZ]>;
    808 def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
    809                       (ins f32imm:$a, Float32Regs:$b),
    810                        "rcp.rn.f32 \t$dst, $b;",
    811                       [(set Float32Regs:$dst,
    812                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
    813                       Requires<[reqPTX20]>;
    814 //
    815 // F32 Accurate division
    816 //
    817 def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
    818                       (ins Float32Regs:$a, Float32Regs:$b),
    819                       "div.rn.ftz.f32 \t$dst, $a, $b;",
    820                       [(set Float32Regs:$dst,
    821                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
    822                       Requires<[doF32FTZ, reqPTX20]>;
    823 def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
    824                       (ins Float32Regs:$a, f32imm:$b),
    825                       "div.rn.ftz.f32 \t$dst, $a, $b;",
    826                       [(set Float32Regs:$dst,
    827                         (fdiv Float32Regs:$a, fpimm:$b))]>,
    828                       Requires<[doF32FTZ, reqPTX20]>;
    829 def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
    830                       (ins Float32Regs:$a, Float32Regs:$b),
    831                       "div.rn.f32 \t$dst, $a, $b;",
    832                       [(set Float32Regs:$dst,
    833                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
    834                       Requires<[reqPTX20]>;
    835 def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
    836                       (ins Float32Regs:$a, f32imm:$b),
    837                       "div.rn.f32 \t$dst, $a, $b;",
    838                       [(set Float32Regs:$dst,
    839                         (fdiv Float32Regs:$a, fpimm:$b))]>,
    840                       Requires<[reqPTX20]>;
    841 
    842 
    843 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
    844    def rrr : NVPTXInst<(outs Float32Regs:$dst),
    845                       (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
    846                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
    847                       [(set Float32Regs:$dst, (fadd
    848                         (fmul Float32Regs:$a, Float32Regs:$b),
    849                         Float32Regs:$c))]>, Requires<[Pred]>;
    850    // This is to WAR a weird bug in Tablegen that does not automatically
    851    // generate the following permutated rule rrr2 from the above rrr.
    852    // So we explicitly add it here. This happens to FMA32 only.
    853    // See the comments at FMAD32 and FMA32 for more information.
    854    def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
    855                         (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
    856                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
    857                       [(set Float32Regs:$dst, (fadd Float32Regs:$c,
    858                         (fmul Float32Regs:$a, Float32Regs:$b)))]>,
    859                       Requires<[Pred]>;
    860    def rri : NVPTXInst<(outs Float32Regs:$dst),
    861                       (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
    862                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
    863                       [(set Float32Regs:$dst, (fadd
    864                         (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
    865                       Requires<[Pred]>;
    866    def rir : NVPTXInst<(outs Float32Regs:$dst),
    867                       (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
    868                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
    869                       [(set Float32Regs:$dst, (fadd
    870                         (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
    871                       Requires<[Pred]>;
    872    def rii : NVPTXInst<(outs Float32Regs:$dst),
    873                       (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
    874                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
    875                       [(set Float32Regs:$dst, (fadd
    876                         (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
    877                       Requires<[Pred]>;
    878 }
    879 
    880 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
    881    def rrr : NVPTXInst<(outs Float64Regs:$dst),
    882                       (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
    883                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
    884                       [(set Float64Regs:$dst, (fadd
    885                         (fmul Float64Regs:$a, Float64Regs:$b),
    886                         Float64Regs:$c))]>, Requires<[Pred]>;
    887    def rri : NVPTXInst<(outs Float64Regs:$dst),
    888                       (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
    889                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
    890                       [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
    891                         Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
    892    def rir : NVPTXInst<(outs Float64Regs:$dst),
    893                       (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
    894                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
    895                       [(set Float64Regs:$dst, (fadd
    896                         (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
    897                       Requires<[Pred]>;
    898    def rii : NVPTXInst<(outs Float64Regs:$dst),
    899                       (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
    900                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
    901                       [(set Float64Regs:$dst, (fadd
    902                         (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
    903                       Requires<[Pred]>;
    904 }
    905 
    906 // Due to a unknown reason (most likely a bug in tablegen), tablegen does not
    907 // automatically generate the rrr2 rule from
    908 // the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
    909 // If we reverse the order of the following two lines, then rrr2 rule will be
    910 // generated for FMA32, but not for rrr.
    911 // Therefore, we manually write the rrr2 rule in FPCONTRACT32.
    912 defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
    913 defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
    914 defm FMA32_ftz  : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
    915 defm FMA32  : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
    916 defm FMA64  : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
    917 
    918 // b*c-a => fmad(b, c, -a)
    919 multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
    920   def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
    921           (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
    922           Requires<[Pred]>;
    923 }
    924 
    925 // a-b*c => fmad(-b,c, a)
    926 // - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
    927 // b*c-a => fmad(b, c, -a)
    928 // - legal because b*c-a <=> b*c+(-a)
    929 multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
    930   def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
    931           (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
    932           Requires<[Pred]>;
    933   def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
    934           (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
    935           Requires<[Pred]>;
    936 }
    937 
    938 // a-b*c => fmad(-b,c, a)
    939 // b*c-a => fmad(b, c, -a)
    940 multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
    941   def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
    942           (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
    943           Requires<[Pred]>;
    944 
    945   def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
    946           (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
    947           Requires<[Pred]>;
    948 }
    949 
    950 defm FMAF32ext_ftz  : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
    951 defm FMAF32ext  : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
    952 defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
    953 defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
    954 defm FMAF64ext  : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
    955 
    956 def SINF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
    957                       "sin.approx.f32 \t$dst, $src;",
    958                       [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
    959 def COSF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
    960                       "cos.approx.f32 \t$dst, $src;",
    961                       [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
    962 
    963 //-----------------------------------
    964 // Logical Arithmetic
    965 //-----------------------------------
    966 
    967 multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
    968   def b1rr:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
    969                       !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
    970                       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
    971   def b1ri:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
    972                       !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
    973                       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
    974   def b8rr:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
    975                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
    976                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
    977   def b8ri:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
    978                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
    979                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
    980   def b16rr:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
    981                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
    982                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
    983                         Int16Regs:$b))]>;
    984   def b16ri:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
    985                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
    986                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
    987   def b32rr:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
    988                       !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
    989                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
    990                         Int32Regs:$b))]>;
    991   def b32ri:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
    992                       !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
    993                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
    994   def b64rr:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
    995                       !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
    996                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
    997                         Int64Regs:$b))]>;
    998   def b64ri:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
    999                       !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
   1000                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
   1001 }
   1002 
   1003 defm OR  : LOG_FORMAT<"or", or>;
   1004 defm AND : LOG_FORMAT<"and", and>;
   1005 defm XOR : LOG_FORMAT<"xor", xor>;
   1006 
   1007 def NOT1:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
   1008                       "not.pred \t$dst, $src;",
   1009                       [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
   1010 def NOT8:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
   1011                       "not.b16 \t$dst, $src;",
   1012                       [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
   1013 def NOT16:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
   1014                       "not.b16 \t$dst, $src;",
   1015                       [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
   1016 def NOT32:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
   1017                       "not.b32 \t$dst, $src;",
   1018                       [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
   1019 def NOT64:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
   1020                       "not.b64 \t$dst, $src;",
   1021                       [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
   1022 
   1023 // For shifts, the second src operand must be 32-bit value
   1024 multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
   1025    def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
   1026                       Int32Regs:$b),
   1027                       !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
   1028                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
   1029                         Int32Regs:$b))]>;
   1030    def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
   1031                       !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
   1032                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
   1033                         (i32 imm:$b)))]>;
   1034    def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
   1035                       Int32Regs:$b),
   1036                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
   1037                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
   1038                         Int32Regs:$b))]>;
   1039    def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
   1040                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
   1041                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
   1042                         (i32 imm:$b)))]>;
   1043    def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
   1044                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
   1045                       [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
   1046                         (i32 imm:$b)))]>;
   1047    def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
   1048                       Int32Regs:$b),
   1049                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
   1050                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
   1051                         Int32Regs:$b))]>;
   1052    def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
   1053                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
   1054                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
   1055                         (i32 imm:$b)))]>;
   1056    def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
   1057                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
   1058                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
   1059                         Int32Regs:$b))]>;
   1060    def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
   1061                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
   1062                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
   1063                         (i32 imm:$b)))]>;
   1064 }
   1065 
   1066 defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
   1067 
   1068 // For shifts, the second src operand must be 32-bit value
   1069 // Need to add cvt for the 8-bits.
   1070 multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
   1071    def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
   1072                       Int32Regs:$b),
   1073                       !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
   1074                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
   1075                         Int32Regs:$b))]>;
   1076    def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
   1077                       !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
   1078                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
   1079                         (i32 imm:$b)))]>;
   1080    def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
   1081                       Int32Regs:$b),
   1082                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
   1083                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
   1084                         Int32Regs:$b))]>;
   1085    def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
   1086                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
   1087                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
   1088                         (i32 imm:$b)))]>;
   1089    def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
   1090                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
   1091                       [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
   1092                         (i32 imm:$b)))]>;
   1093    def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
   1094                       Int32Regs:$b),
   1095                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
   1096                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
   1097                         Int32Regs:$b))]>;
   1098    def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
   1099                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
   1100                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
   1101                         (i32 imm:$b)))]>;
   1102    def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
   1103                       !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
   1104                       !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
   1105                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
   1106                         Int32Regs:$b))]>;
   1107    def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
   1108                       !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
   1109                       !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
   1110                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
   1111                         (i32 imm:$b)))]>;
   1112 }
   1113 
   1114 defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
   1115 defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">;
   1116 
   1117 // 32bit
   1118 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
   1119   (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
   1120     !strconcat("{{\n\t",
   1121     !strconcat(".reg .b32 %lhs;\n\t",
   1122     !strconcat(".reg .b32 %rhs;\n\t",
   1123     !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
   1124     !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
   1125     !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
   1126     !strconcat("}}", ""))))))),
   1127     []>;
   1128 
   1129 def SUB_FRM_32 : SDNodeXForm<imm, [{
   1130     return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
   1131 }]>;
   1132 
   1133 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
   1134           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
   1135 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
   1136           (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
   1137 
   1138 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
   1139     Int32Regs:$amt),
   1140     !strconcat("{{\n\t",
   1141     !strconcat(".reg .b32 %lhs;\n\t",
   1142     !strconcat(".reg .b32 %rhs;\n\t",
   1143     !strconcat(".reg .b32 %amt2;\n\t",
   1144     !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
   1145     !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
   1146     !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
   1147     !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
   1148     !strconcat("}}", ""))))))))),
   1149     [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
   1150 
   1151 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
   1152     Int32Regs:$amt),
   1153     !strconcat("{{\n\t",
   1154     !strconcat(".reg .b32 %lhs;\n\t",
   1155     !strconcat(".reg .b32 %rhs;\n\t",
   1156     !strconcat(".reg .b32 %amt2;\n\t",
   1157     !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
   1158     !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
   1159     !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
   1160     !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
   1161     !strconcat("}}", ""))))))))),
   1162     [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
   1163 
   1164 // 64bit
   1165 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
   1166     i32imm:$amt1, i32imm:$amt2),
   1167     !strconcat("{{\n\t",
   1168     !strconcat(".reg .b64 %lhs;\n\t",
   1169     !strconcat(".reg .b64 %rhs;\n\t",
   1170     !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
   1171     !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
   1172     !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
   1173     !strconcat("}}", ""))))))),
   1174     []>;
   1175 
   1176 def SUB_FRM_64 : SDNodeXForm<imm, [{
   1177     return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
   1178 }]>;
   1179 
   1180 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
   1181           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
   1182 def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
   1183           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
   1184 
   1185 def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
   1186     Int32Regs:$amt),
   1187     !strconcat("{{\n\t",
   1188     !strconcat(".reg .b64 %lhs;\n\t",
   1189     !strconcat(".reg .b64 %rhs;\n\t",
   1190     !strconcat(".reg .u32 %amt2;\n\t",
   1191     !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
   1192     !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
   1193     !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
   1194     !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
   1195     !strconcat("}}", ""))))))))),
   1196     [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
   1197 
   1198 def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
   1199     Int32Regs:$amt),
   1200     !strconcat("{{\n\t",
   1201     !strconcat(".reg .b64 %lhs;\n\t",
   1202     !strconcat(".reg .b64 %rhs;\n\t",
   1203     !strconcat(".reg .u32 %amt2;\n\t",
   1204     !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
   1205     !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
   1206     !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
   1207     !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
   1208     !strconcat("}}", ""))))))))),
   1209     [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
   1210 
   1211 
   1212 //-----------------------------------
   1213 // Data Movement (Load / Store, Move)
   1214 //-----------------------------------
   1215 
   1216 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
   1217   [SDNPWantRoot]>;
   1218 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
   1219   [SDNPWantRoot]>;
   1220 
   1221 def MEMri : Operand<i32> {
   1222   let PrintMethod = "printMemOperand";
   1223   let MIOperandInfo = (ops Int32Regs, i32imm);
   1224 }
   1225 def MEMri64 : Operand<i64> {
   1226   let PrintMethod = "printMemOperand";
   1227   let MIOperandInfo = (ops Int64Regs, i64imm);
   1228 }
   1229 
   1230 def imem : Operand<iPTR> {
   1231     let PrintMethod = "printOperand";
   1232 }
   1233 
   1234 def imemAny : Operand<iPTRAny> {
   1235     let PrintMethod = "printOperand";
   1236 }
   1237 
   1238 def LdStCode : Operand<i32> {
   1239     let PrintMethod = "printLdStCode";
   1240 }
   1241 
   1242 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
   1243 def Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
   1244 
   1245 def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
   1246                      "mov.u32 \t$dst, $a;",
   1247                      [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
   1248 
   1249 def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
   1250                      "mov.u64 \t$dst, $a;",
   1251                      [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
   1252 
   1253 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
   1254 let IsSimpleMove=1 in {
   1255 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
   1256                    "mov.pred \t$dst, $sss;", []>;
   1257 def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
   1258                     "mov.u16 \t$dst, $sss;", []>;
   1259 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
   1260                     "mov.u16 \t$dst, $sss;", []>;
   1261 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
   1262                     "mov.u32 \t$dst, $sss;", []>;
   1263 def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
   1264                     "mov.u64 \t$dst, $sss;", []>;
   1265 
   1266 def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
   1267                     "mov.f32 \t$dst, $src;", []>;
   1268 def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
   1269                     "mov.f64 \t$dst, $src;", []>;
   1270 }
   1271 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
   1272                     "mov.pred \t$dst, $src;",
   1273           [(set Int1Regs:$dst, imm:$src)]>;
   1274 def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
   1275                     "mov.u16 \t$dst, $src;",
   1276           [(set Int8Regs:$dst, imm:$src)]>;
   1277 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
   1278                     "mov.u16 \t$dst, $src;",
   1279           [(set Int16Regs:$dst, imm:$src)]>;
   1280 def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
   1281                     "mov.u32 \t$dst, $src;",
   1282           [(set Int32Regs:$dst, imm:$src)]>;
   1283 def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
   1284                     "mov.u64 \t$dst, $src;",
   1285           [(set Int64Regs:$dst, imm:$src)]>;
   1286 
   1287 def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
   1288                     "mov.f32 \t$dst, $src;",
   1289           [(set Float32Regs:$dst, fpimm:$src)]>;
   1290 def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
   1291                     "mov.f64 \t$dst, $src;",
   1292           [(set Float64Regs:$dst, fpimm:$src)]>;
   1293 
   1294 def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
   1295 
   1296 //---- Copy Frame Index ----
   1297 def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
   1298                         "add.u32 \t$dst, ${addr:add};",
   1299                         [(set Int32Regs:$dst, ADDRri:$addr)]>;
   1300 def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
   1301                         "add.u64 \t$dst, ${addr:add};",
   1302                         [(set Int64Regs:$dst, ADDRri64:$addr)]>;
   1303 
   1304 //-----------------------------------
   1305 // Comparison and Selection
   1306 //-----------------------------------
   1307 
   1308 // Generate string block like
   1309 // {
   1310 //   .reg .pred p;
   1311 //   setp.gt.s16 p, %a, %b;
   1312 //   selp.s16 %dst, -1, 0, p;
   1313 // }
   1314 // when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b
   1315 class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
   1316   string b> {
   1317   string t1  = "{{\n\t.reg .pred p;\n\t";
   1318   string t2  = !strconcat(t1 , OpcStr);
   1319   string t3  = !strconcat(t2 , sz1);
   1320   string t4  = !strconcat(t3 , " \tp, ");
   1321   string t5  = !strconcat(t4 , a);
   1322   string t6  = !strconcat(t5 , ", ");
   1323   string t7  = !strconcat(t6 , b);
   1324   string t8  = !strconcat(t7 , ";\n\tselp.s");
   1325   string t9  = !strconcat(t8 , sz2);
   1326   string t10 = !strconcat(t9, " \t");
   1327   string t11 = !strconcat(t10, d);
   1328   string s   = !strconcat(t11, ", -1, 0, p;\n\t}}");
   1329 }
   1330 
   1331 // Generate string block like
   1332 // {
   1333 //   .reg .pred p;
   1334 //   .reg .s16 %temp1;
   1335 //   .reg .s16 %temp2;
   1336 //   cvt.s16.s8 %temp1, %a;
   1337 //   cvt s16.s8 %temp1, %b;
   1338 //   setp.gt.s16 p, %temp1, %temp2;
   1339 //   selp.s16 %dst, -1, 0, p;
   1340 // }
   1341 // when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
   1342 class Set_Stri8<string OpcStr, string d, string a, string b, string type,
   1343   string cvt> {
   1344   string t1  = "{{\n\t.reg .pred p;\n\t";
   1345   string t2  = !strconcat(t1, ".reg .");
   1346   string t3  = !strconcat(t2, type);
   1347   string t4  = !strconcat(t3, " %temp1;\n\t");
   1348   string t5  = !strconcat(t4, ".reg .");
   1349   string t6  = !strconcat(t5, type);
   1350   string t7  = !strconcat(t6, " %temp2;\n\t");
   1351   string t8  = !strconcat(t7, cvt);
   1352   string t9  = !strconcat(t8, " \t%temp1, ");
   1353   string t10 = !strconcat(t9, a);
   1354   string t11 = !strconcat(t10, ";\n\t");
   1355   string t12 = !strconcat(t11, cvt);
   1356   string t13 = !strconcat(t12, " \t%temp2, ");
   1357   string t14 = !strconcat(t13, b);
   1358   string t15 = !strconcat(t14, ";\n\t");
   1359   string t16 = !strconcat(t15, OpcStr);
   1360   string t17 = !strconcat(t16, "16");
   1361   string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
   1362   string t19 = !strconcat(t18, "selp.s16 \t");
   1363   string t20 = !strconcat(t19, d);
   1364   string s   = !strconcat(t20, ", -1, 0, p;\n\t}}");
   1365 }
   1366 
   1367 multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
   1368   string TypeStr, string CVTStr> {
   1369   def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
   1370                      Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
   1371                []>;
   1372   def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
   1373       Int16Regs:$b),
   1374                      Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
   1375                []>;
   1376   def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
   1377       Int32Regs:$b),
   1378                      Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s,
   1379                []>;
   1380   def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
   1381       Int64Regs:$b),
   1382                      Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
   1383                []>;
   1384 
   1385   def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
   1386                      Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
   1387                [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
   1388   def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
   1389                      Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
   1390                [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
   1391   def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
   1392                      Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
   1393                [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
   1394   def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
   1395                  !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
   1396                [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
   1397   def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
   1398                  !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
   1399                [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
   1400   def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
   1401                  !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
   1402                [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
   1403   def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
   1404                  !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
   1405                [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
   1406   def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
   1407                  !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
   1408                [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
   1409   def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
   1410                  !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
   1411                [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
   1412   def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
   1413                  !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
   1414                [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
   1415   def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
   1416                  !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
   1417                [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
   1418   def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
   1419                  !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
   1420                [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
   1421 
   1422   def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
   1423                      Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s,
   1424                [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
   1425   def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
   1426                      Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s,
   1427                [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
   1428   def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
   1429                      Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s,
   1430                [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
   1431   def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
   1432       Int16Regs:$b),
   1433                  !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
   1434                [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
   1435   def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
   1436                  !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
   1437                [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
   1438   def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
   1439                  !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
   1440                [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
   1441   def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
   1442       Int32Regs:$b),
   1443                  !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
   1444                [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
   1445   def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
   1446                  !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
   1447                [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
   1448   def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
   1449                  !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
   1450                [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
   1451   def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a,
   1452       Int64Regs:$b),
   1453                  !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
   1454                [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
   1455   def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
   1456                  !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
   1457                [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
   1458   def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
   1459                  !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
   1460                [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
   1461 }
   1462 
   1463 multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> {
   1464   def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
   1465       Float32Regs:$b),
   1466                      Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s,
   1467                []>, Requires<[doF32FTZ]>;
   1468   def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
   1469       Float32Regs:$b),
   1470                      Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s,
   1471                []>;
   1472   def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a,
   1473       Float64Regs:$b),
   1474                      Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s,
   1475                []>;
   1476   def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a,
   1477       Float64Regs:$b),
   1478                      Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s,
   1479                []>;
   1480 
   1481   def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a
   1482       , Float32Regs:$b),
   1483                  !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
   1484                [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>
   1485   , Requires<[doF32FTZ]>;
   1486   def f32rr_p: NVPTXInst<(outs Int1Regs:$dst),
   1487     (ins Float32Regs:$a, Float32Regs:$b),
   1488                  !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
   1489                [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
   1490   def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
   1491     (ins Float32Regs:$a, f32imm:$b),
   1492                  !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
   1493                [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
   1494   Requires<[doF32FTZ]>;
   1495   def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b),
   1496                  !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
   1497                [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
   1498   def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
   1499     (ins f32imm:$a, Float32Regs:$b),
   1500                  !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
   1501                [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>,
   1502   Requires<[doF32FTZ]>;
   1503   def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b),
   1504                  !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
   1505                [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
   1506   def f64rr_p: NVPTXInst<(outs Int1Regs:$dst),
   1507     (ins Float64Regs:$a, Float64Regs:$b),
   1508                  !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
   1509                [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
   1510   def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b),
   1511                  !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
   1512                [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
   1513   def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b),
   1514                  !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
   1515                [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
   1516 
   1517   def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
   1518     (ins Float32Regs:$a, Float32Regs:$b),
   1519                  !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
   1520                [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
   1521   def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst),
   1522     (ins Float32Regs:$a, Float32Regs:$b),
   1523                  !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
   1524                [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
   1525   def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
   1526     (ins Float32Regs:$a, f32imm:$b),
   1527                  !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
   1528                [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
   1529   def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst),
   1530     (ins Float32Regs:$a, f32imm:$b),
   1531                  !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
   1532                [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
   1533   def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
   1534     (ins f32imm:$a, Float32Regs:$b),
   1535                  !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
   1536                [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
   1537   def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst),
   1538     (ins f32imm:$a, Float32Regs:$b),
   1539                  !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
   1540                [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
   1541   def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst),
   1542     (ins Float64Regs:$a, Float64Regs:$b),
   1543                  !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
   1544                [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
   1545   def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst),
   1546     (ins Float64Regs:$a, f64imm:$b),
   1547                  !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
   1548                [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
   1549   def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst),
   1550     (ins f64imm:$a, Float64Regs:$b),
   1551                  !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
   1552                [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
   1553 }
   1554 
   1555 defm ISetSGT
   1556 : ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">;
   1557 defm ISetUGT
   1558 : ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">;
   1559 defm ISetSLT
   1560 : ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">;
   1561 defm ISetULT
   1562 : ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">;
   1563 defm ISetSGE
   1564 : ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">;
   1565 defm ISetUGE
   1566 : ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">;
   1567 defm ISetSLE
   1568 : ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">;
   1569 defm ISetULE
   1570 : ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">;
   1571 defm ISetSEQ
   1572 : ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">;
   1573 defm ISetUEQ
   1574 : ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">;
   1575 defm ISetSNE
   1576 : ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">;
   1577 defm ISetUNE
   1578 : ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">;
   1579 
   1580 def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
   1581   (ins Int1Regs:$a, Int1Regs:$b),
   1582                       "xor.pred \t$dst, $a, $b;",
   1583             [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
   1584 def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
   1585   (ins Int1Regs:$a, Int1Regs:$b),
   1586                       "xor.pred \t$dst, $a, $b;",
   1587             [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>;
   1588 def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
   1589   (ins Int1Regs:$a, Int1Regs:$b),
   1590             !strconcat("{{\n\t",
   1591             !strconcat(".reg .pred temp;\n\t",
   1592             !strconcat("xor.pred \ttemp, $a, $b;\n\t",
   1593             !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
   1594             [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
   1595 def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
   1596   (ins Int1Regs:$a, Int1Regs:$b),
   1597             !strconcat("{{\n\t",
   1598             !strconcat(".reg .pred temp;\n\t",
   1599             !strconcat("xor.pred \ttemp, $a, $b;\n\t",
   1600             !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
   1601             [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>;
   1602 
   1603 // Compare 2 i1's and produce a u32
   1604 def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
   1605   (ins Int1Regs:$a, Int1Regs:$b),
   1606                   !strconcat("{{\n\t",
   1607                   !strconcat(".reg .pred temp;\n\t",
   1608                   !strconcat("xor.pred \ttemp, $a, $b;\n\t",
   1609                   !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))),
   1610                   [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
   1611 def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
   1612   (ins Int1Regs:$a, Int1Regs:$b),
   1613                   !strconcat("{{\n\t",
   1614                   !strconcat(".reg .pred temp;\n\t",
   1615                   !strconcat("xor.pred \ttemp, $a, $b;\n\t",
   1616                   !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))),
   1617                   [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
   1618 
   1619 defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>;
   1620 defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>;
   1621 defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>;
   1622 defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>;
   1623 defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>;
   1624 defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>;
   1625 
   1626 defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>;
   1627 defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>;
   1628 defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>;
   1629 defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>;
   1630 defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>;
   1631 defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>;
   1632 
   1633 defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>;
   1634 defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
   1635 
   1636 def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
   1637                      (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
   1638                              (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
   1639 def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst),
   1640   (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p),
   1641                       "selp.b16 \t$dst, $a, $b, $p;",
   1642       [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>;
   1643 def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst),
   1644   (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p),
   1645                       "selp.b16 \t$dst, $a, $b, $p;",
   1646       [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>;
   1647 def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst),
   1648   (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p),
   1649                       "selp.b16 \t$dst, $a, $b, $p;",
   1650       [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>;
   1651 def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst),
   1652   (ins i8imm:$a, i8imm:$b, Int1Regs:$p),
   1653                       "selp.b16 \t$dst, $a, $b, $p;",
   1654       [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
   1655 
   1656 def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
   1657   (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
   1658                       "selp.b16 \t$dst, $a, $b, $p;",
   1659       [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>;
   1660 def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst),
   1661   (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p),
   1662                       "selp.b16 \t$dst, $a, $b, $p;",
   1663       [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>;
   1664 def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst),
   1665   (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p),
   1666                       "selp.b16 \t$dst, $a, $b, $p;",
   1667       [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>;
   1668 def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst),
   1669   (ins i16imm:$a, i16imm:$b, Int1Regs:$p),
   1670                       "selp.b16 \t$dst, $a, $b, $p;",
   1671       [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
   1672 
   1673 def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst),
   1674   (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
   1675                       "selp.b32 \t$dst, $a, $b, $p;",
   1676       [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>;
   1677 def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst),
   1678   (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p),
   1679                       "selp.b32 \t$dst, $a, $b, $p;",
   1680       [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>;
   1681 def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst),
   1682   (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p),
   1683                       "selp.b32 \t$dst, $a, $b, $p;",
   1684       [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>;
   1685 def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst),
   1686   (ins i32imm:$a, i32imm:$b, Int1Regs:$p),
   1687                       "selp.b32 \t$dst, $a, $b, $p;",
   1688       [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
   1689 
   1690 def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst),
   1691   (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p),
   1692                       "selp.b64 \t$dst, $a, $b, $p;",
   1693       [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>;
   1694 def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst),
   1695   (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p),
   1696                       "selp.b64 \t$dst, $a, $b, $p;",
   1697       [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>;
   1698 def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst),
   1699   (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p),
   1700                       "selp.b64 \t$dst, $a, $b, $p;",
   1701       [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>;
   1702 def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst),
   1703   (ins i64imm:$a, i64imm:$b, Int1Regs:$p),
   1704                       "selp.b64 \t$dst, $a, $b, $p;",
   1705       [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
   1706 
   1707 def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst),
   1708   (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p),
   1709                       "selp.f32 \t$dst, $a, $b, $p;",
   1710       [(set Float32Regs:$dst,
   1711         (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>;
   1712 def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst),
   1713   (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p),
   1714                       "selp.f32 \t$dst, $a, $b, $p;",
   1715       [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>;
   1716 def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst),
   1717   (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p),
   1718                       "selp.f32 \t$dst, $a, $b, $p;",
   1719       [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>;
   1720 def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst),
   1721   (ins f32imm:$a, f32imm:$b, Int1Regs:$p),
   1722                       "selp.f32 \t$dst, $a, $b, $p;",
   1723       [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
   1724 
   1725 def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst),
   1726   (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p),
   1727                       "selp.f64 \t$dst, $a, $b, $p;",
   1728       [(set Float64Regs:$dst,
   1729         (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>;
   1730 def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst),
   1731   (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p),
   1732                       "selp.f64 \t$dst, $a, $b, $p;",
   1733       [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>;
   1734 def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst),
   1735   (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p),
   1736                       "selp.f64 \t$dst, $a, $b, $p;",
   1737       [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>;
   1738 def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst),
   1739   (ins f64imm:$a, f64imm:$b, Int1Regs:$p),
   1740                       "selp.f64 \t $dst, $a, $b, $p;",
   1741       [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
   1742 
   1743 //def ld_param         : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
   1744 //                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
   1745 
   1746 def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
   1747   SDTCisInt<2>]>;
   1748 def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
   1749   SDTCisInt<1>, SDTCisInt<2>]>;
   1750 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
   1751 def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
   1752 def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
   1753 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
   1754 def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
   1755 def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
   1756 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
   1757 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
   1758 def SDTCallValProfile : SDTypeProfile<1, 0, []>;
   1759 def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
   1760 def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>;
   1761 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
   1762 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
   1763 
   1764 def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
   1765                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1766 def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
   1767   SDTDeclareScalarParamProfile,
   1768                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1769 def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
   1770   SDTDeclareParamProfile,
   1771                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1772 def DeclareRet   : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
   1773                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1774 def LoadParam    : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
   1775                          [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
   1776 def PrintCall    : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
   1777                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1778 def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
   1779                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1780 def StoreParam   : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
   1781                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1782 def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
   1783                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1784 def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
   1785                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1786 def MoveToParam  : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile,
   1787                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1788 def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
   1789                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1790 def CallArg      : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
   1791                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1792 def LastCallArg  : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
   1793                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1794 def CallArgEnd   : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
   1795                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1796 def CallVoid     : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
   1797                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1798 def Prototype    : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
   1799                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1800 def CallVal      : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
   1801                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1802 def MoveParam    : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
   1803                          []>;
   1804 def MoveRetval   : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile,
   1805                          [SDNPHasChain, SDNPSideEffect]>;
   1806 def StoreRetval  : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
   1807                          [SDNPHasChain, SDNPSideEffect]>;
   1808 def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile,
   1809                          [SDNPHasChain, SDNPSideEffect]>;
   1810 def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
   1811   SDTPseudoUseParamProfile,
   1812                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
   1813 def RETURNNode   : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
   1814                          [SDNPHasChain, SDNPSideEffect]>;
   1815 
   1816 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
   1817       NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
   1818                 !strconcat(!strconcat("ld.param", opstr),
   1819                 "\t$dst, [retval0+$b];"),
   1820                 [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
   1821 
   1822 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
   1823       NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
   1824                 !strconcat(!strconcat("mov", opstr),
   1825                 "\t$dst, retval$b;"),
   1826                 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
   1827 
   1828 class StoreParamInst<NVPTXRegClass regclass, string opstr> :
   1829       NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
   1830                 !strconcat(!strconcat("st.param", opstr),
   1831                 "\t[param$a+$b], $val;"),
   1832                 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
   1833 
   1834 class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
   1835       NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
   1836                 !strconcat(!strconcat("mov", opstr),
   1837                 "\tparam$a, $val;"),
   1838                 [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
   1839 
   1840 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
   1841       NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
   1842                 !strconcat(!strconcat("st.param", opstr),
   1843                 "\t[func_retval0+$a], $val;"),
   1844                 [(StoreRetval (i32 imm:$a), regclass:$val)]>;
   1845 
   1846 class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
   1847       NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
   1848                 !strconcat(!strconcat("mov", opstr),
   1849                 "\tfunc_retval$num, $val;"),
   1850                 [(MoveToRetval (i32 imm:$num), regclass:$val)]>;
   1851 
   1852 class MoveRetvalInst<NVPTXRegClass regclass, string opstr> :
   1853       NVPTXInst<(outs), (ins regclass:$val),
   1854                 !strconcat(!strconcat("mov", opstr),
   1855                 "\tfunc_retval0, $val;"),
   1856                 [(MoveRetval regclass:$val)]>;
   1857 
   1858 def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
   1859 "call (retval0), ",
   1860                                 [(PrintCall (i32 1))]>;
   1861 def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
   1862 "call (retval0, retval1), ",
   1863                                 [(PrintCall (i32 2))]>;
   1864 def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
   1865 "call (retval0, retval1, retval2), ",
   1866                                 [(PrintCall (i32 3))]>;
   1867 def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
   1868 "call (retval0, retval1, retval2, retval3), ",
   1869                                 [(PrintCall (i32 4))]>;
   1870 def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
   1871 "call (retval0, retval1, retval2, retval3, retval4), ",
   1872                                 [(PrintCall (i32 5))]>;
   1873 def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
   1874 "call (retval0, retval1, retval2, retval3, retval4, retval5), ",
   1875                                 [(PrintCall (i32 6))]>;
   1876 def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
   1877 "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
   1878                                 [(PrintCall (i32 7))]>;
   1879 def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
   1880 !strconcat("call (retval0, retval1, retval2, retval3, retval4",
   1881            ", retval5, retval6, retval7), "),
   1882                                 [(PrintCall (i32 8))]>;
   1883 
   1884 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
   1885                                 [(PrintCall (i32 0))]>;
   1886 
   1887 def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
   1888 "call.uni (retval0), ",
   1889                                 [(PrintCallUni (i32 1))]>;
   1890 def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
   1891 "call.uni (retval0, retval1), ",
   1892                                 [(PrintCallUni (i32 2))]>;
   1893 def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
   1894 "call.uni (retval0, retval1, retval2), ",
   1895                                 [(PrintCallUni (i32 3))]>;
   1896 def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
   1897 "call.uni (retval0, retval1, retval2, retval3), ",
   1898                                 [(PrintCallUni (i32 4))]>;
   1899 def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
   1900 "call.uni (retval0, retval1, retval2, retval3, retval4), ",
   1901                                 [(PrintCallUni (i32 5))]>;
   1902 def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
   1903 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
   1904                                 [(PrintCallUni (i32 6))]>;
   1905 def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
   1906 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
   1907                                 [(PrintCallUni (i32 7))]>;
   1908 def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
   1909 !strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
   1910            ", retval5, retval6, retval7), "),
   1911                                 [(PrintCallUni (i32 8))]>;
   1912 
   1913 def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
   1914                                 [(PrintCallUni (i32 0))]>;
   1915 
   1916 def LoadParamMemI64    : LoadParamMemInst<Int64Regs, ".b64">;
   1917 def LoadParamMemI32    : LoadParamMemInst<Int32Regs, ".b32">;
   1918 def LoadParamMemI16    : LoadParamMemInst<Int16Regs, ".b16">;
   1919 def LoadParamMemI8     : LoadParamMemInst<Int8Regs, ".b8">;
   1920 
   1921 //def LoadParamMemI16    : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
   1922 //                !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
   1923 //                "cvt.u16.u32\t$dst, temp_param_reg;"),
   1924 //                [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
   1925 //def LoadParamMemI8     : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
   1926 //                !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
   1927 //                "cvt.u16.u32\t$dst, temp_param_reg;"),
   1928 //                [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
   1929 
   1930 def LoadParamMemF32    : LoadParamMemInst<Float32Regs, ".f32">;
   1931 def LoadParamMemF64    : LoadParamMemInst<Float64Regs, ".f64">;
   1932 
   1933 def LoadParamRegI64    : LoadParamRegInst<Int64Regs, ".b64">;
   1934 def LoadParamRegI32    : LoadParamRegInst<Int32Regs, ".b32">;
   1935 def LoadParamRegI16    : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
   1936                          "cvt.u16.u32\t$dst, retval$b;",
   1937                          [(set Int16Regs:$dst,
   1938                            (LoadParam (i32 0), (i32 imm:$b)))]>;
   1939 def LoadParamRegI8     : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
   1940                          "cvt.u16.u32\t$dst, retval$b;",
   1941                          [(set Int8Regs:$dst,
   1942                            (LoadParam (i32 0), (i32 imm:$b)))]>;
   1943 
   1944 def LoadParamRegF32    : LoadParamRegInst<Float32Regs, ".f32">;
   1945 def LoadParamRegF64    : LoadParamRegInst<Float64Regs, ".f64">;
   1946 
   1947 def StoreParamI64    : StoreParamInst<Int64Regs, ".b64">;
   1948 def StoreParamI32    : StoreParamInst<Int32Regs, ".b32">;
   1949 
   1950 def StoreParamI16    : NVPTXInst<(outs),
   1951   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
   1952                        "st.param.b16\t[param$a+$b], $val;",
   1953            [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
   1954 
   1955 def StoreParamI8     : NVPTXInst<(outs),
   1956   (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
   1957                        "st.param.b8\t[param$a+$b], $val;",
   1958                        [(StoreParam
   1959                          (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
   1960 
   1961 def StoreParamS32I16 : NVPTXInst<(outs),
   1962   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
   1963                  !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
   1964                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
   1965                  [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
   1966 def StoreParamU32I16 : NVPTXInst<(outs),
   1967   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
   1968                  !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
   1969                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
   1970                  [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
   1971 
   1972 def StoreParamU32I8   : NVPTXInst<(outs),
   1973   (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
   1974                  !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
   1975                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
   1976                  [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
   1977 def StoreParamS32I8   : NVPTXInst<(outs),
   1978   (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
   1979                  !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
   1980                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
   1981                  [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
   1982 
   1983 def StoreParamF32    : StoreParamInst<Float32Regs, ".f32">;
   1984 def StoreParamF64    : StoreParamInst<Float64Regs, ".f64">;
   1985 
   1986 def MoveToParamI64   : MoveToParamInst<Int64Regs, ".b64">;
   1987 def MoveToParamI32   : MoveToParamInst<Int32Regs, ".b32">;
   1988 def MoveToParamF64   : MoveToParamInst<Float64Regs, ".f64">;
   1989 def MoveToParamF32   : MoveToParamInst<Float32Regs, ".f32">;
   1990 def MoveToParamI16   : NVPTXInst<(outs),
   1991   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
   1992                    !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
   1993                               "mov.b32\tparam$a, temp_param_reg;"),
   1994                    [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
   1995 def MoveToParamI8    : NVPTXInst<(outs),
   1996   (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
   1997                    !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
   1998                               "mov.b32\tparam$a, temp_param_reg;"),
   1999                    [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
   2000 
   2001 def StoreRetvalI64    : StoreRetvalInst<Int64Regs, ".b64">;
   2002 def StoreRetvalI32    : StoreRetvalInst<Int32Regs, ".b32">;
   2003 def StoreRetvalI16    : StoreRetvalInst<Int16Regs, ".b16">;
   2004 def StoreRetvalI8     : StoreRetvalInst<Int8Regs, ".b8">;
   2005 
   2006 //def StoreRetvalI16    : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a),
   2007 //     !strconcat("\{\n\t",
   2008 //     !strconcat(".reg .b32 temp_retval_reg;\n\t",
   2009 //     !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
   2010 //                "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
   2011 //     [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>;
   2012 //def StoreRetvalI8     : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a),
   2013 //     !strconcat("\{\n\t",
   2014 //     !strconcat(".reg .b32 temp_retval_reg;\n\t",
   2015 //     !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
   2016 //                "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
   2017 //     [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>;
   2018 
   2019 def StoreRetvalF64    : StoreRetvalInst<Float64Regs, ".f64">;
   2020 def StoreRetvalF32    : StoreRetvalInst<Float32Regs, ".f32">;
   2021 
   2022 def MoveRetvalI64    : MoveRetvalInst<Int64Regs, ".b64">;
   2023 def MoveRetvalI32    : MoveRetvalInst<Int32Regs, ".b32">;
   2024 def MoveRetvalI16    : MoveRetvalInst<Int16Regs, ".b16">;
   2025 def MoveRetvalI8     : MoveRetvalInst<Int8Regs, ".b8">;
   2026 def MoveRetvalF64    : MoveRetvalInst<Float64Regs, ".f64">;
   2027 def MoveRetvalF32    : MoveRetvalInst<Float32Regs, ".f32">;
   2028 
   2029 def MoveToRetvalI64    : MoveToRetvalInst<Int64Regs, ".b64">;
   2030 def MoveToRetvalI32    : MoveToRetvalInst<Int32Regs, ".b32">;
   2031 def MoveToRetvalF64    : MoveToRetvalInst<Float64Regs, ".f64">;
   2032 def MoveToRetvalF32    : MoveToRetvalInst<Float32Regs, ".f32">;
   2033 def MoveToRetvalI16    : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
   2034                          "cvt.u32.u16\tfunc_retval$num, $val;",
   2035                          [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
   2036 def MoveToRetvalI8     : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val),
   2037                          "cvt.u32.u16\tfunc_retval$num, $val;",
   2038                          [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>;
   2039 
   2040 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
   2041 def CallArgEndInst1  : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
   2042 def CallArgEndInst0  : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
   2043 def RETURNInst       : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
   2044 
   2045 class CallArgInst<NVPTXRegClass regclass> :
   2046       NVPTXInst<(outs), (ins regclass:$a), "$a, ",
   2047                 [(CallArg (i32 0), regclass:$a)]>;
   2048 
   2049 class LastCallArgInst<NVPTXRegClass regclass> :
   2050       NVPTXInst<(outs), (ins regclass:$a), "$a",
   2051                 [(LastCallArg (i32 0), regclass:$a)]>;
   2052 
   2053 def CallArgI64     : CallArgInst<Int64Regs>;
   2054 def CallArgI32     : CallArgInst<Int32Regs>;
   2055 def CallArgI16     : CallArgInst<Int16Regs>;
   2056 def CallArgI8      : CallArgInst<Int8Regs>;
   2057 
   2058 def CallArgF64     : CallArgInst<Float64Regs>;
   2059 def CallArgF32     : CallArgInst<Float32Regs>;
   2060 
   2061 def LastCallArgI64 : LastCallArgInst<Int64Regs>;
   2062 def LastCallArgI32 : LastCallArgInst<Int32Regs>;
   2063 def LastCallArgI16 : LastCallArgInst<Int16Regs>;
   2064 def LastCallArgI8  : LastCallArgInst<Int8Regs>;
   2065 
   2066 def LastCallArgF64 : LastCallArgInst<Float64Regs>;
   2067 def LastCallArgF32 : LastCallArgInst<Float32Regs>;
   2068 
   2069 def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
   2070                               [(CallArg (i32 0), (i32 imm:$a))]>;
   2071 def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
   2072                               [(LastCallArg (i32 0), (i32 imm:$a))]>;
   2073 
   2074 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
   2075                              [(CallArg (i32 1), (i32 imm:$a))]>;
   2076 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
   2077                              [(LastCallArg (i32 1), (i32 imm:$a))]>;
   2078 
   2079 def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
   2080                              "$addr, ",
   2081                              [(CallVoid (Wrapper tglobaladdr:$addr))]>;
   2082 def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
   2083                              "$addr, ",
   2084                              [(CallVoid Int32Regs:$addr)]>;
   2085 def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
   2086                              "$addr, ",
   2087                              [(CallVoid Int64Regs:$addr)]>;
   2088 def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
   2089                              ", prototype_$val;",
   2090                              [(Prototype (i32 imm:$val))]>;
   2091 
   2092 def DeclareRetMemInst : NVPTXInst<(outs),
   2093   (ins i32imm:$align, i32imm:$size, i32imm:$num),
   2094          ".param .align $align .b8 retval$num[$size];",
   2095          [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
   2096 def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
   2097          ".param .b$size retval$num;",
   2098          [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
   2099 def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
   2100          ".reg .b$size retval$num;",
   2101          [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
   2102 
   2103 def DeclareParamInst : NVPTXInst<(outs),
   2104   (ins i32imm:$align, i32imm:$a, i32imm:$size),
   2105          ".param .align $align .b8 param$a[$size];",
   2106          [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
   2107 def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
   2108          ".param .b$size param$a;",
   2109          [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
   2110 def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
   2111          ".reg .b$size param$a;",
   2112          [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
   2113 
   2114 class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
   2115       NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
   2116                 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
   2117                 [(set regclass:$dst, (MoveParam regclass:$src))]>;
   2118 
   2119 def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
   2120 def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
   2121 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
   2122                    "cvt.u16.u32\t$dst, $src;",
   2123                    [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
   2124 def MoveParamI8  : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
   2125                    "cvt.u16.u32\t$dst, $src;",
   2126                    [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>;
   2127 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
   2128 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
   2129 
   2130 class PseudoUseParamInst<NVPTXRegClass regclass> :
   2131       NVPTXInst<(outs), (ins regclass:$src),
   2132       "// Pseudo use of $src",
   2133       [(PseudoUseParam regclass:$src)]>;
   2134 
   2135 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
   2136 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
   2137 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
   2138 def PseudoUseParamI8  : PseudoUseParamInst<Int8Regs>;
   2139 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
   2140 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
   2141 
   2142 
   2143 //
   2144 // Load / Store Handling
   2145 //
   2146 multiclass LD<NVPTXRegClass regclass> {
   2147   def _avar : NVPTXInst<(outs regclass:$dst),
   2148     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2149       i32imm:$fromWidth, imem:$addr),
   2150 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2151            "$fromWidth \t$dst, [$addr];"), []>;
   2152   def _areg : NVPTXInst<(outs regclass:$dst),
   2153     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2154       i32imm:$fromWidth, Int32Regs:$addr),
   2155 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2156            "$fromWidth \t$dst, [$addr];"), []>;
   2157   def _areg_64 : NVPTXInst<(outs regclass:$dst),
   2158     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2159      i32imm:$fromWidth, Int64Regs:$addr),
   2160      !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
   2161                 " \t$dst, [$addr];"), []>;
   2162   def _ari : NVPTXInst<(outs regclass:$dst),
   2163     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2164       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
   2165 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2166            "$fromWidth \t$dst, [$addr+$offset];"), []>;
   2167   def _ari_64 : NVPTXInst<(outs regclass:$dst),
   2168     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2169      i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
   2170     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
   2171                " \t$dst, [$addr+$offset];"), []>;
   2172   def _asi : NVPTXInst<(outs regclass:$dst),
   2173     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2174       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
   2175 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2176            "$fromWidth \t$dst, [$addr+$offset];"), []>;
   2177 }
   2178 
   2179 let mayLoad=1, neverHasSideEffects=1 in {
   2180 defm LD_i8  : LD<Int8Regs>;
   2181 defm LD_i16 : LD<Int16Regs>;
   2182 defm LD_i32 : LD<Int32Regs>;
   2183 defm LD_i64 : LD<Int64Regs>;
   2184 defm LD_f32 : LD<Float32Regs>;
   2185 defm LD_f64 : LD<Float64Regs>;
   2186 }
   2187 
   2188 multiclass ST<NVPTXRegClass regclass> {
   2189   def _avar : NVPTXInst<(outs),
   2190     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
   2191       LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
   2192 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
   2193            " \t[$addr], $src;"), []>;
   2194   def _areg : NVPTXInst<(outs),
   2195     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
   2196       LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
   2197 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
   2198            " \t[$addr], $src;"), []>;
   2199   def _areg_64 : NVPTXInst<(outs),
   2200     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
   2201      LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
   2202   !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
   2203                "\t[$addr], $src;"), []>;
   2204   def _ari : NVPTXInst<(outs),
   2205     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
   2206       LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
   2207 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
   2208            " \t[$addr+$offset], $src;"), []>;
   2209   def _ari_64 : NVPTXInst<(outs),
   2210     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
   2211      LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
   2212   !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
   2213                "\t[$addr+$offset], $src;"), []>;
   2214   def _asi : NVPTXInst<(outs),
   2215     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
   2216       LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
   2217 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
   2218            " \t[$addr+$offset], $src;"), []>;
   2219 }
   2220 
   2221 let mayStore=1, neverHasSideEffects=1 in {
   2222 defm ST_i8  : ST<Int8Regs>;
   2223 defm ST_i16 : ST<Int16Regs>;
   2224 defm ST_i32 : ST<Int32Regs>;
   2225 defm ST_i64 : ST<Int64Regs>;
   2226 defm ST_f32 : ST<Float32Regs>;
   2227 defm ST_f64 : ST<Float64Regs>;
   2228 }
   2229 
   2230 // The following is used only in and after vector elementizations.
   2231 // Vector elementization happens at the machine instruction level, so the
   2232 // following instruction
   2233 // never appears in the DAG.
   2234 multiclass LD_VEC<NVPTXRegClass regclass> {
   2235   def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   2236     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2237       i32imm:$fromWidth, imem:$addr),
   2238     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2239                "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
   2240   def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   2241     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2242       i32imm:$fromWidth, Int32Regs:$addr),
   2243     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2244                "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
   2245   def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   2246     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2247      i32imm:$fromWidth, Int64Regs:$addr),
   2248     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2249                "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
   2250   def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   2251     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2252       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
   2253     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2254                "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
   2255   def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   2256     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2257      i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
   2258     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2259                "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
   2260   def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   2261     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2262       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
   2263     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2264                "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
   2265   def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
   2266       regclass:$dst3, regclass:$dst4),
   2267     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2268       i32imm:$fromWidth, imem:$addr),
   2269     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2270                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
   2271   def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   2272       regclass:$dst4),
   2273     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2274       i32imm:$fromWidth, Int32Regs:$addr),
   2275     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2276                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
   2277   def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
   2278                                regclass:$dst3, regclass:$dst4),
   2279     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2280      i32imm:$fromWidth, Int64Regs:$addr),
   2281     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2282                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
   2283   def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   2284       regclass:$dst4),
   2285     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2286       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
   2287     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2288                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
   2289                 []>;
   2290   def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
   2291                               regclass:$dst3, regclass:$dst4),
   2292     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2293      i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
   2294     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2295                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
   2296     []>;
   2297   def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   2298       regclass:$dst4),
   2299     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2300       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
   2301     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2302                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
   2303                 []>;
   2304 }
   2305 let mayLoad=1, neverHasSideEffects=1 in {
   2306 defm LDV_i8  : LD_VEC<Int8Regs>;
   2307 defm LDV_i16 : LD_VEC<Int16Regs>;
   2308 defm LDV_i32 : LD_VEC<Int32Regs>;
   2309 defm LDV_i64 : LD_VEC<Int64Regs>;
   2310 defm LDV_f32 : LD_VEC<Float32Regs>;
   2311 defm LDV_f64 : LD_VEC<Float64Regs>;
   2312 }
   2313 
   2314 multiclass ST_VEC<NVPTXRegClass regclass> {
   2315   def _v2_avar : NVPTXInst<(outs),
   2316     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
   2317       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
   2318     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2319                "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
   2320   def _v2_areg : NVPTXInst<(outs),
   2321     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
   2322       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
   2323     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2324                "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
   2325   def _v2_areg_64 : NVPTXInst<(outs),
   2326     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
   2327      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
   2328     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2329                "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
   2330   def _v2_ari : NVPTXInst<(outs),
   2331     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
   2332       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
   2333       i32imm:$offset),
   2334     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2335                "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
   2336   def _v2_ari_64 : NVPTXInst<(outs),
   2337     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
   2338      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
   2339      i32imm:$offset),
   2340     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2341                "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
   2342   def _v2_asi : NVPTXInst<(outs),
   2343     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
   2344       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
   2345       i32imm:$offset),
   2346     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2347                "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
   2348   def _v4_avar : NVPTXInst<(outs),
   2349     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
   2350       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2351       i32imm:$fromWidth, imem:$addr),
   2352     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2353                "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
   2354   def _v4_areg : NVPTXInst<(outs),
   2355     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
   2356       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2357       i32imm:$fromWidth, Int32Regs:$addr),
   2358     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2359                "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
   2360   def _v4_areg_64 : NVPTXInst<(outs),
   2361     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
   2362      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2363      i32imm:$fromWidth, Int64Regs:$addr),
   2364     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2365                "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
   2366   def _v4_ari : NVPTXInst<(outs),
   2367     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
   2368       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2369       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
   2370     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2371                "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
   2372     []>;
   2373   def _v4_ari_64 : NVPTXInst<(outs),
   2374     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
   2375      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2376      i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
   2377     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2378                "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
   2379      []>;
   2380   def _v4_asi : NVPTXInst<(outs),
   2381     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
   2382       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
   2383       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
   2384     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
   2385                "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
   2386     []>;
   2387 }
   2388 let mayStore=1, neverHasSideEffects=1 in {
   2389 defm STV_i8  : ST_VEC<Int8Regs>;
   2390 defm STV_i16 : ST_VEC<Int16Regs>;
   2391 defm STV_i32 : ST_VEC<Int32Regs>;
   2392 defm STV_i64 : ST_VEC<Int64Regs>;
   2393 defm STV_f32 : ST_VEC<Float32Regs>;
   2394 defm STV_f64 : ST_VEC<Float64Regs>;
   2395 }
   2396 
   2397 
   2398 //---- Conversion ----
   2399 
   2400 multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
   2401 // FIXME: need to add f16 support
   2402 //  def CVTf16i8 :
   2403 //    NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a),
   2404 //              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"),
   2405 //        [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>;
   2406 //  def CVTf16i16 :
   2407 //    NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
   2408 //              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
   2409 //        [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>;
   2410 //  def CVTf16i32 :
   2411 //    NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a),
   2412 //              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"),
   2413 //        [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>;
   2414 //  def CVTf16i64:
   2415 //    NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a),
   2416 //          !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
   2417 //            [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
   2418 
   2419   def CVTf32i1 :
   2420     NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
   2421               "selp.f32 \t$d, 1.0, 0.0, $a;",
   2422         [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
   2423   def CVTf32i8 :
   2424     NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a),
   2425               !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"),
   2426         [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>;
   2427   def CVTf32i16 :
   2428     NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
   2429               !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
   2430         [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>;
   2431   def CVTf32i32 :
   2432     NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a),
   2433               !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"),
   2434         [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>;
   2435   def CVTf32i64:
   2436     NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a),
   2437           !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
   2438             [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
   2439 
   2440   def CVTf64i1 :
   2441     NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
   2442               "selp.f64 \t$d, 1.0, 0.0, $a;",
   2443         [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
   2444   def CVTf64i8 :
   2445     NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a),
   2446               !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"),
   2447         [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>;
   2448   def CVTf64i16 :
   2449     NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
   2450               !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
   2451         [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>;
   2452   def CVTf64i32 :
   2453     NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a),
   2454               !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"),
   2455         [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>;
   2456   def CVTf64i64:
   2457     NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a),
   2458           !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"),
   2459             [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>;
   2460 }
   2461 
   2462 defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>;
   2463 defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
   2464 
   2465 multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
   2466 // FIXME: need to add f16 support
   2467 //  def CVTi8f16:
   2468 //    NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a),
   2469 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"),
   2470 //        [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>;
   2471   def CVTi8f32_ftz:
   2472     NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
   2473               !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
   2474         [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
   2475   def CVTi8f32:
   2476     NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
   2477               !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
   2478         [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>;
   2479   def CVTi8f64:
   2480     NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a),
   2481               !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
   2482         [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>;
   2483 
   2484 // FIXME: need to add f16 support
   2485 //  def CVTi16f16:
   2486 //    NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
   2487 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
   2488 //        [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>;
   2489   def CVTi16f32_ftz:
   2490     NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
   2491               !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
   2492         [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
   2493   def CVTi16f32:
   2494     NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
   2495               !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
   2496         [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>;
   2497   def CVTi16f64:
   2498     NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a),
   2499               !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
   2500         [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>;
   2501 
   2502 // FIXME: need to add f16 support
   2503 //  def CVTi32f16:  def CVTi32f16:
   2504 //    NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a),
   2505 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"),
   2506 //        [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>;
   2507   def CVTi32f32_ftz:
   2508     NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
   2509               !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"),
   2510         [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
   2511   def CVTi32f32:
   2512     NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
   2513               !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"),
   2514         [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>;
   2515   def CVTi32f64:
   2516     NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a),
   2517               !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"),
   2518         [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>;
   2519 
   2520 // FIXME: need to add f16 support
   2521 //  def CVTi64f16:
   2522 //    NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a),
   2523 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"),
   2524 //        [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>;
   2525   def CVTi64f32_ftz:
   2526     NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
   2527               !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"),
   2528         [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
   2529   def CVTi64f32:
   2530     NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
   2531               !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"),
   2532         [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>;
   2533   def CVTi64f64:
   2534     NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a),
   2535               !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"),
   2536         [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>;
   2537 }
   2538 
   2539 defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
   2540 defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
   2541 
   2542 multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
   2543   def ext1to8:
   2544        NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
   2545            "selp.u16 \t$d, 1, 0, $a;",
   2546      [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
   2547   def ext1to16:
   2548        NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
   2549            "selp.u16 \t$d, 1, 0, $a;",
   2550      [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
   2551   def ext1to32:
   2552        NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
   2553            "selp.u32 \t$d, 1, 0, $a;",
   2554      [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
   2555   def ext1to64:
   2556        NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
   2557            "selp.u64 \t$d, 1, 0, $a;",
   2558      [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
   2559 }
   2560 
   2561 multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
   2562   def ext1to8:
   2563        NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
   2564            "selp.s16 \t$d, -1, 0, $a;",
   2565      [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
   2566   def ext1to16:
   2567        NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
   2568            "selp.s16 \t$d, -1, 0, $a;",
   2569      [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
   2570   def ext1to32:
   2571        NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
   2572            "selp.s32 \t$d, -1, 0, $a;",
   2573      [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
   2574   def ext1to64:
   2575        NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
   2576            "selp.s64 \t$d, -1, 0, $a;",
   2577      [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
   2578 }
   2579 
   2580 multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
   2581   // All Int8Regs are emiited as 16bit registers in ptx.
   2582   // And there is no selp.u8 in ptx.
   2583   def ext8to16:
   2584        NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a),
   2585            !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.",
   2586              !strconcat(OpStr, "8 \t$d, $a;")))),
   2587      [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>;
   2588   def ext8to32:
   2589        NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a),
   2590            !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
   2591              !strconcat(OpStr, "8 \t$d, $a;")))),
   2592      [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>;
   2593   def ext8to64:
   2594        NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a),
   2595            !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
   2596              !strconcat(OpStr, "8 \t$d, $a;")))),
   2597      [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>;
   2598   def ext16to32:
   2599        NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
   2600            !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
   2601              !strconcat(OpStr, "16 \t$d, $a;")))),
   2602      [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>;
   2603   def ext16to64:
   2604        NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a),
   2605            !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
   2606              !strconcat(OpStr, "16 \t$d, $a;")))),
   2607      [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>;
   2608   def ext32to64:
   2609        NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a),
   2610            !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
   2611              !strconcat(OpStr, "32 \t$d, $a;")))),
   2612      [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>;
   2613 }
   2614 
   2615 defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>;
   2616 defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>;
   2617 defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>;
   2618 
   2619 defm Sint_extend : INT_EXTEND <"s", sext>;
   2620 defm Zint_extend : INT_EXTEND <"u", zext>;
   2621 defm Aint_extend : INT_EXTEND <"u", anyext>;
   2622 
   2623 class TRUNC_to1_asm<string sz> {
   2624   string s = !strconcat("{{\n\t",
   2625              !strconcat(".reg ",
   2626              !strconcat(sz,
   2627              !strconcat(" temp;\n\t",
   2628              !strconcat("and",
   2629              !strconcat(sz,
   2630              !strconcat("\t temp, $a, 1;\n\t",
   2631              !strconcat("setp",
   2632              !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}")))))))));
   2633 }
   2634 
   2635 def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   2636              "cvt.u32.u64 \t$d, $a;",
   2637        [(set Int32Regs:$d, (trunc Int64Regs:$a))]>;
   2638 def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
   2639              "cvt.u16.u64 \t$d, $a;",
   2640        [(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
   2641 def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a),
   2642              "cvt.u8.u64 \t$d, $a;",
   2643        [(set Int8Regs:$d, (trunc Int64Regs:$a))]>;
   2644 def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
   2645              "cvt.u16.u32 \t$d, $a;",
   2646        [(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
   2647 def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a),
   2648              "cvt.u8.u32 \t$d, $a;",
   2649        [(set Int8Regs:$d, (trunc Int32Regs:$a))]>;
   2650 def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a),
   2651              "cvt.u8.u16 \t$d, $a;",
   2652        [(set Int8Regs:$d, (trunc Int16Regs:$a))]>;
   2653 def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   2654              TRUNC_to1_asm<".b64">.s,
   2655              [(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
   2656 def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
   2657              TRUNC_to1_asm<".b32">.s,
   2658              [(set Int1Regs:$d, (trunc Int32Regs:$a))]>;
   2659 def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
   2660              TRUNC_to1_asm<".b16">.s,
   2661              [(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
   2662 def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a),
   2663              TRUNC_to1_asm<".b16">.s,
   2664              [(set Int1Regs:$d, (trunc Int8Regs:$a))]>;
   2665 
   2666 // Select instructions
   2667 def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b),
   2668           (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>;
   2669 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
   2670           (SELECTi16rr Int16Regs:$a, Int16Regs:$b,
   2671             (TRUNC_32to1 Int32Regs:$pred))>;
   2672 def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
   2673           (SELECTi32rr Int32Regs:$a, Int32Regs:$b,
   2674             (TRUNC_32to1 Int32Regs:$pred))>;
   2675 def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
   2676           (SELECTi64rr Int64Regs:$a, Int64Regs:$b,
   2677             (TRUNC_32to1 Int32Regs:$pred))>;
   2678 def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
   2679           (SELECTf32rr Float32Regs:$a, Float32Regs:$b,
   2680             (TRUNC_32to1 Int32Regs:$pred))>;
   2681 def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
   2682           (SELECTf64rr Float64Regs:$a, Float64Regs:$b,
   2683             (TRUNC_32to1 Int32Regs:$pred))>;
   2684 
   2685 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
   2686   NVPTXRegClass regclassOut> :
   2687            NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
   2688            !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
   2689      [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
   2690 
   2691 def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
   2692 def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
   2693 def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
   2694 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
   2695 
   2696 // pack a set of smaller int registers to a larger int register
   2697 def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d),
   2698                           (ins Int8Regs:$s1, Int8Regs:$s2,
   2699                                Int8Regs:$s3, Int8Regs:$s4),
   2700                           !strconcat("{{\n\t.reg .b8\t%t<4>;",
   2701                           !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
   2702                           !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
   2703                           !strconcat("\n\tcvt.u8.u8\t%t2, $s3;",
   2704                           !strconcat("\n\tcvt.u8.u8\t%t3, $s4;",
   2705                            "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))),
   2706                           []>;
   2707 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
   2708                           (ins Int16Regs:$s1, Int16Regs:$s2,
   2709                                Int16Regs:$s3, Int16Regs:$s4),
   2710                           "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
   2711                           []>;
   2712 def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d),
   2713                           (ins Int8Regs:$s1, Int8Regs:$s2),
   2714                           !strconcat("{{\n\t.reg .b8\t%t<2>;",
   2715                           !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
   2716                           !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
   2717                                      "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))),
   2718                           []>;
   2719 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
   2720                           (ins Int16Regs:$s1, Int16Regs:$s2),
   2721                           "mov.b32\t$d, {{$s1, $s2}};",
   2722                           []>;
   2723 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
   2724                           (ins Int32Regs:$s1, Int32Regs:$s2),
   2725                           "mov.b64\t$d, {{$s1, $s2}};",
   2726                           []>;
   2727 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
   2728                           (ins Float32Regs:$s1, Float32Regs:$s2),
   2729                           "mov.b64\t$d, {{$s1, $s2}};",
   2730                           []>;
   2731 
   2732 // unpack a larger int register to a set of smaller int registers
   2733 def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2,
   2734                                 Int8Regs:$d3, Int8Regs:$d4),
   2735                           (ins Int32Regs:$s),
   2736                           !strconcat("{{\n\t.reg .b8\t%t<4>;",
   2737                           !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;",
   2738                           !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
   2739                           !strconcat("\n\tcvt.u8.u8\t$d2, %t1;",
   2740                           !strconcat("\n\tcvt.u8.u8\t$d3, %t2;",
   2741                                      "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))),
   2742                           []>;
   2743 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
   2744                                  Int16Regs:$d3, Int16Regs:$d4),
   2745                            (ins Int64Regs:$s),
   2746                            "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
   2747                           []>;
   2748 def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2),
   2749                           (ins Int16Regs:$s),
   2750                           !strconcat("{{\n\t.reg .b8\t%t<2>;",
   2751                           !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;",
   2752                           !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
   2753                                      "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))),
   2754                           []>;
   2755 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
   2756                            (ins Int32Regs:$s),
   2757                            "mov.b32\t{{$d1, $d2}}, $s;",
   2758                           []>;
   2759 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
   2760                            (ins Int64Regs:$s),
   2761                            "mov.b64\t{{$d1, $d2}}, $s;",
   2762                           []>;
   2763 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
   2764                            (ins Float64Regs:$s),
   2765                            "mov.b64\t{{$d1, $d2}}, $s;",
   2766                           []>;
   2767 
   2768 def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
   2769             "cvt.rn.ftz.f32.f64 \t$d, $a;",
   2770       [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>;
   2771 
   2772 def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
   2773             "cvt.rn.f32.f64 \t$d, $a;",
   2774       [(set Float32Regs:$d, (fround Float64Regs:$a))]>;
   2775 
   2776 def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
   2777             "cvt.ftz.f64.f32 \t$d, $a;",
   2778       [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>;
   2779 
   2780 def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
   2781             "cvt.f64.f32 \t$d, $a;",
   2782       [(set Float64Regs:$d, (fextend Float32Regs:$a))]>;
   2783 
   2784 def retflag       : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
   2785                            [SDNPHasChain, SDNPOptInGlue]>;
   2786 
   2787 //-----------------------------------
   2788 // Control-flow
   2789 //-----------------------------------
   2790 
   2791 let isTerminator=1 in {
   2792    let isReturn=1, isBarrier=1 in
   2793       def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
   2794 
   2795    let isBranch=1 in
   2796       def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
   2797                           "@$a bra \t$target;",
   2798                            [(brcond Int1Regs:$a, bb:$target)]>;
   2799    let isBranch=1 in
   2800       def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
   2801                           "@!$a bra \t$target;",
   2802                            []>;
   2803 
   2804    let isBranch=1, isBarrier=1 in
   2805       def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
   2806                         "bra.uni \t$target;",
   2807                   [(br bb:$target)]>;
   2808 }
   2809 
   2810 def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch
   2811     (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>;
   2812 
   2813 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
   2814 // conditional branch if
   2815 // the target block is the next block so that the code can fall through to the
   2816 // target block.
   2817 // The invertion is done by 'xor condition, 1', which will be translated to
   2818 // (setne condition, -1).
   2819 // Since ptx supports '@!pred bra target', we should use it.
   2820 def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
   2821   (CBranchOther Int1Regs:$a, bb:$target)>;
   2822 
   2823 // Call
   2824 def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
   2825 def SDT_NVPTXCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
   2826                                         SDTCisVT<1, i32> ]>;
   2827 
   2828 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
   2829                            [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
   2830 def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_NVPTXCallSeqEnd,
   2831                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
   2832                            SDNPSideEffect]>;
   2833 
   2834 def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
   2835 def call          : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
   2836                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
   2837 def calltarget : Operand<i32>;
   2838 let isCall=1 in {
   2839    def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
   2840                   "call \t$dst, (1);", []>;
   2841 }
   2842 
   2843 def : Pat<(call tglobaladdr:$dst),
   2844           (CALL tglobaladdr:$dst)>;
   2845 def : Pat<(call texternalsym:$dst),
   2846           (CALL texternalsym:$dst)>;
   2847 
   2848 // Pseudo instructions.
   2849 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
   2850    : NVPTXInst<outs, ins, asmstr, pattern>;
   2851 
   2852 // @TODO: We use some tricks here to emit curly braces.  Can we clean this up
   2853 // a bit without TableGen modifications?
   2854 def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
   2855   "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
   2856                                [(callseq_start timm:$amt)]>;
   2857 def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
   2858   "\n\t//{{\n\t}}// Callseq End $amt1",
   2859                             [(callseq_end timm:$amt1, timm:$amt2)]>;
   2860 
   2861 // trap instruction
   2862 
   2863 def trapinst : NVPTXInst<(outs), (ins),
   2864                          "trap;",
   2865                          [(trap)]>;
   2866 
   2867 include "NVPTXIntrinsics.td"
   2868 
   2869 
   2870 //-----------------------------------
   2871 // Notes
   2872 //-----------------------------------
   2873 // BSWAP is currently expanded. The following is a more efficient
   2874 // - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
   2875 // - for sm_20, use pmpt (use vector scalar mov to get the pack and
   2876 //   unpack). sm_20 supports native 32-bit register, but not native 16-bit
   2877 // register.
   2878