Home | History | Annotate | Download | only in NVPTX
      1 //===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 //-----------------------------------
     11 // Vector Specific
     12 //-----------------------------------
     13 
     14 //
     15 // All vector instructions derive from NVPTXVecInst
     16 //
     17 
     18 class NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern,
     19   NVPTXInst sInst=NOP>
     20   : NVPTXInst<outs, ins, asmstr, pattern> {
     21   NVPTXInst scalarInst=sInst;
     22 }
     23 
     24 let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in {
     25 // Extract v2i16
     26 def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
     27   (ins V2I16Regs:$src, i8imm:$c),
     28                          "mov.u16 \t$dst, $src${c:vecelem};",
     29                          [(set Int16Regs:$dst, (vector_extract
     30                            (v2i16 V2I16Regs:$src), imm:$c))],
     31                          IMOV16rr>;
     32 
     33 // Extract v4i16
     34 def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
     35   (ins V4I16Regs:$src, i8imm:$c),
     36                          "mov.u16 \t$dst, $src${c:vecelem};",
     37                          [(set Int16Regs:$dst, (vector_extract
     38                            (v4i16 V4I16Regs:$src), imm:$c))],
     39                          IMOV16rr>;
     40 
     41 // Extract v2i8
     42 def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
     43   (ins V2I8Regs:$src, i8imm:$c),
     44                          "mov.u16 \t$dst, $src${c:vecelem};",
     45                          [(set Int8Regs:$dst, (vector_extract
     46                            (v2i8 V2I8Regs:$src), imm:$c))],
     47                          IMOV8rr>;
     48 
     49 // Extract v4i8
     50 def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
     51   (ins V4I8Regs:$src, i8imm:$c),
     52                          "mov.u16 \t$dst, $src${c:vecelem};",
     53                          [(set Int8Regs:$dst, (vector_extract
     54                            (v4i8 V4I8Regs:$src), imm:$c))],
     55                          IMOV8rr>;
     56 
     57 // Extract v2i32
     58 def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
     59   (ins V2I32Regs:$src, i8imm:$c),
     60                          "mov.u32 \t$dst, $src${c:vecelem};",
     61                          [(set Int32Regs:$dst, (vector_extract
     62                            (v2i32 V2I32Regs:$src), imm:$c))],
     63                          IMOV32rr>;
     64 
     65 // Extract v2f32
     66 def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
     67   (ins V2F32Regs:$src, i8imm:$c),
     68                          "mov.f32 \t$dst, $src${c:vecelem};",
     69                          [(set Float32Regs:$dst, (vector_extract
     70                            (v2f32 V2F32Regs:$src), imm:$c))],
     71                          FMOV32rr>;
     72 
     73 // Extract v2i64
     74 def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
     75   (ins V2I64Regs:$src, i8imm:$c),
     76                          "mov.u64 \t$dst, $src${c:vecelem};",
     77                          [(set Int64Regs:$dst, (vector_extract
     78                            (v2i64 V2I64Regs:$src), imm:$c))],
     79                          IMOV64rr>;
     80 
     81 // Extract v2f64
     82 def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
     83   (ins V2F64Regs:$src, i8imm:$c),
     84                          "mov.f64 \t$dst, $src${c:vecelem};",
     85                          [(set Float64Regs:$dst, (vector_extract
     86                            (v2f64 V2F64Regs:$src), imm:$c))],
     87                          FMOV64rr>;
     88 
     89 // Extract v4i32
     90 def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
     91   (ins V4I32Regs:$src, i8imm:$c),
     92                          "mov.u32 \t$dst, $src${c:vecelem};",
     93                          [(set Int32Regs:$dst, (vector_extract
     94                            (v4i32 V4I32Regs:$src), imm:$c))],
     95                          IMOV32rr>;
     96 
     97 // Extract v4f32
     98 def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
     99   (ins V4F32Regs:$src, i8imm:$c),
    100                          "mov.f32 \t$dst, $src${c:vecelem};",
    101                          [(set Float32Regs:$dst, (vector_extract
    102                            (v4f32 V4F32Regs:$src), imm:$c))],
    103                          FMOV32rr>;
    104 }
    105 
    106 let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in {
    107 // Insert v2i8
    108 def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst),
    109   (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c),
    110         "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
    111         "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
    112        [(set V2I8Regs:$dst,
    113          (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))],
    114                          IMOV8rr>;
    115 
    116 // Insert v4i8
    117 def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
    118   (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c),
    119                        "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
    120                        "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
    121        [(set V4I8Regs:$dst,
    122          (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))],
    123                          IMOV8rr>;
    124 
    125 // Insert v2i16
    126 def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
    127   (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c),
    128                        "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
    129                        "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
    130        [(set V2I16Regs:$dst,
    131          (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))],
    132                          IMOV16rr>;
    133 
    134 // Insert v4i16
    135 def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
    136   (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c),
    137                        "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
    138                        "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
    139        [(set V4I16Regs:$dst,
    140          (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))],
    141                          IMOV16rr>;
    142 
    143 // Insert v2i32
    144 def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
    145   (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c),
    146                        "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};"
    147                        "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
    148        [(set V2I32Regs:$dst,
    149          (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))],
    150                          IMOV32rr>;
    151 
    152 // Insert v2f32
    153 def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
    154   (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c),
    155                        "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};"
    156                        "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
    157        [(set V2F32Regs:$dst,
    158          (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))],
    159                          FMOV32rr>;
    160 
    161 // Insert v2i64
    162 def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
    163   (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c),
    164                        "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};"
    165                        "\n\tmov.u64 \t$dst${c:vecelem}, $val;",
    166        [(set V2I64Regs:$dst,
    167          (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))],
    168                          IMOV64rr>;
    169 
    170 // Insert v2f64
    171 def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
    172   (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c),
    173                        "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};"
    174                        "\n\tmov.f64 \t$dst${c:vecelem}, $val;",
    175        [(set V2F64Regs:$dst,
    176          (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))],
    177                          FMOV64rr>;
    178 
    179 // Insert v4i32
    180 def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
    181   (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c),
    182                        "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};"
    183                        "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
    184        [(set V4I32Regs:$dst,
    185          (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))],
    186                          IMOV32rr>;
    187 
    188 // Insert v4f32
    189 def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
    190   (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c),
    191                        "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};"
    192                        "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
    193        [(set V4F32Regs:$dst,
    194          (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))],
    195                          FMOV32rr>;
    196 }
    197 
    198 class BinOpAsmString<string c> {
    199   string s = c;
    200 }
    201 
    202 class V4AsmStr<string opcode> : BinOpAsmString<
    203                           !strconcat(!strconcat(!strconcat(!strconcat(
    204                             !strconcat(!strconcat(!strconcat(
    205                           opcode,  " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
    206                           opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"),
    207                           opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"),
    208                           opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>;
    209 
    210 class V2AsmStr<string opcode> : BinOpAsmString<
    211                            !strconcat(!strconcat(!strconcat(
    212                            opcode,  " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
    213                            opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>;
    214 
    215 class V4MADStr<string opcode> : BinOpAsmString<
    216                           !strconcat(!strconcat(!strconcat(!strconcat(
    217                             !strconcat(!strconcat(!strconcat(
    218                           opcode,  " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
    219                           opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"),
    220                           opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"),
    221                           opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>;
    222 
    223 class V2MADStr<string opcode> : BinOpAsmString<
    224                            !strconcat(!strconcat(!strconcat(
    225                            opcode,  " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
    226                            opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>;
    227 
    228 class V4UnaryStr<string opcode> : BinOpAsmString<
    229                           !strconcat(!strconcat(!strconcat(!strconcat(
    230                             !strconcat(!strconcat(!strconcat(
    231                           opcode,  " \t${dst}_0, ${a}_0;\n\t"),
    232                           opcode), " \t${dst}_1, ${a}_1;\n\t"),
    233                           opcode), " \t${dst}_2, ${a}_2;\n\t"),
    234                           opcode), " \t${dst}_3, ${a}_3;")>;
    235 
    236 class V2UnaryStr<string opcode> : BinOpAsmString<
    237                            !strconcat(!strconcat(!strconcat(
    238                            opcode,  " \t${dst}_0, ${a}_0;\n\t"),
    239                            opcode), " \t${dst}_1, ${a}_1;")>;
    240 
    241 class VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass,
    242   NVPTXInst sInst=NOP> :
    243       NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b),
    244                  asmstr.s,
    245                  [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))],
    246                  sInst>;
    247 
    248 class VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1,
    249                  NVPTXRegClass regclass2, NVPTXInst sInst=NOP> :
    250       NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b),
    251                  asmstr.s,
    252                  [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))],
    253                  sInst>;
    254 
    255 class VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass,
    256   NVPTXInst sInst=NOP> :
    257       NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a),
    258                  asmstr.s,
    259                  [(set regclass:$dst, (OpNode regclass:$a))], sInst>;
    260 
    261 multiclass IntBinVOp<string asmstr, SDNode OpNode,
    262                      NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst
    263                      i16op=NOP, NVPTXInst i8op=NOP> {
    264   def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs,
    265     i64op>;
    266   def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs,
    267     i32op>;
    268   def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs,
    269     i32op>;
    270   def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs,
    271     i16op>;
    272   def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs,
    273     i16op>;
    274   def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs,
    275     i8op>;
    276   def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs,
    277     i8op>;
    278 }
    279 
    280 multiclass FloatBinVOp<string asmstr, SDNode OpNode,
    281                        NVPTXInst f64=NOP, NVPTXInst f32=NOP,
    282                        NVPTXInst f32_ftz=NOP> {
    283   def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode,
    284     V2F64Regs, f64>;
    285   def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
    286     V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
    287   def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
    288     V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
    289   def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode,
    290     V4F32Regs, f32>;
    291   def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode,
    292     V2F32Regs, f32>;
    293 }
    294 
    295 multiclass IntUnaryVOp<string asmstr, PatFrag OpNode,
    296                        NVPTXInst i64op=NOP, NVPTXInst i32op=NOP,
    297                        NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> {
    298   def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode,
    299     V2I64Regs, i64op>;
    300   def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode,
    301     V4I32Regs, i32op>;
    302   def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode,
    303     V2I32Regs, i32op>;
    304   def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
    305     V4I16Regs, i16op>;
    306   def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
    307     V2I16Regs, i16op>;
    308   def V4I8  : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
    309     V4I8Regs,   i8op>;
    310   def V2I8  : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
    311     V2I8Regs,   i8op>;
    312 }
    313 
    314 
    315 // Integer Arithmetic
    316 let VecInstType=isVecOther.Value in {
    317 defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>;
    318 defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>;
    319 
    320 def AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs,
    321   ADDCCi32rr>;
    322 def AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs,
    323   ADDCCi32rr>;
    324 def SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs,
    325   SUBCCi32rr>;
    326 def SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs,
    327   SUBCCi32rr>;
    328 def AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs,
    329   ADDCCCi32rr>;
    330 def AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs,
    331   ADDCCCi32rr>;
    332 def SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs,
    333   SUBCCCi32rr>;
    334 def SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs,
    335   SUBCCCi32rr>;
    336 
    337 def ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs,
    338   SHLi64rr>;
    339 def ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs,
    340   SHLi32rr>;
    341 def ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs,
    342   SHLi32rr>;
    343 def ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs,
    344   SHLi16rr>;
    345 def ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs,
    346   SHLi16rr>;
    347 def ShiftLV2I8  : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs,  V2I32Regs,
    348   SHLi8rr>;
    349 def ShiftLV4I8  : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs,  V4I32Regs,
    350   SHLi8rr>;
    351 }
    352 
    353 // cvt to v*i32, helpers for shift
    354 class CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr,
    355   NVPTXInst sInst=NOP> :
    356       NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>;
    357 
    358 class VecCVTStrHelper<string op, string dest, string src> {
    359   string s=!strconcat(op, !strconcat("\t",
    360            !strconcat(dest, !strconcat(", ", !strconcat(src, ";")))));
    361 }
    362 
    363 class Vec2CVTStr<string op> {
    364   string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
    365            !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s));
    366 }
    367 
    368 class Vec4CVTStr<string op> {
    369   string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
    370            !strconcat("\n\t",
    371            !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s,
    372            !strconcat("\n\t",
    373            !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s,
    374            !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s))))));
    375 }
    376 
    377 let VecInstType=isVecOther.Value in {
    378 def CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs,
    379   Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
    380 def CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs,
    381   Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
    382 def CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs,
    383   Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
    384 def CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs,
    385   Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
    386 def CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs,
    387   Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>;
    388 }
    389 
    390 def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2),
    391           (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
    392 def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2),
    393           (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
    394 def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2),
    395           (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
    396 
    397 def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2),
    398           (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
    399 def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2),
    400           (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
    401 
    402 let VecInstType=isVecOther.Value in {
    403 def ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs,
    404   SRAi64rr>;
    405 def ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs,
    406   SRAi32rr>;
    407 def ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs,
    408   SRAi32rr>;
    409 def ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs,
    410   SRAi16rr>;
    411 def ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs,
    412   SRAi16rr>;
    413 def ShiftRAV2I8  : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs,  V2I32Regs,
    414   SRAi8rr>;
    415 def ShiftRAV4I8  : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs,  V4I32Regs,
    416   SRAi8rr>;
    417 
    418 def ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs,
    419   SRLi64rr>;
    420 def ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs,
    421   SRLi32rr>;
    422 def ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs,
    423   SRLi32rr>;
    424 def ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs,
    425   SRLi16rr>;
    426 def ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs,
    427   SRLi16rr>;
    428 def ShiftRLV2I8  : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs,  V2I32Regs,
    429   SRLi8rr>;
    430 def ShiftRLV4I8  : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs,  V4I32Regs,
    431   SRLi8rr>;
    432 
    433 defm VMult   : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr,
    434   MULTi8rr>;
    435 defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr,
    436   MULTHSi16rr,
    437   MULTHSi8rr>;
    438 defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr,
    439   MULTHUi16rr,
    440   MULTHUi8rr>;
    441 defm VSDiv   : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr,
    442   SDIVi8rr>;
    443 defm VUDiv   : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr,
    444   UDIVi8rr>;
    445 defm VSRem   : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr,
    446   SREMi8rr>;
    447 defm VURem   : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr,
    448   UREMi8rr>;
    449 }
    450 
    451 def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2),
    452           (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
    453 def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2),
    454           (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
    455 def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2),
    456           (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
    457 
    458 def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2),
    459           (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
    460 def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2),
    461           (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
    462 
    463 def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2),
    464           (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
    465 def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2),
    466           (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
    467 def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2),
    468           (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
    469 
    470 def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2),
    471           (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
    472 def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2),
    473           (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
    474 
    475 multiclass VMAD<string asmstr, NVPTXRegClass regclassv4,
    476   NVPTXRegClass regclassv2,
    477                 SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP,
    478                 Predicate Pred> {
    479   def V4 : NVPTXVecInst<(outs regclassv4:$dst),
    480     (ins regclassv4:$a, regclassv4:$b, regclassv4:$c),
    481                       V4MADStr<asmstr>.s,
    482                       [(set regclassv4:$dst,
    483                         (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))],
    484                       sop>,
    485            Requires<[Pred]>;
    486   def V2 : NVPTXVecInst<(outs regclassv2:$dst),
    487     (ins regclassv2:$a, regclassv2:$b, regclassv2:$c),
    488                       V2MADStr<asmstr>.s,
    489                       [(set regclassv2:$dst,
    490                         (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))],
    491                       sop>,
    492            Requires<[Pred]>;
    493 }
    494 
    495 multiclass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
    496   Predicate Pred> {
    497   def V2 : NVPTXVecInst<(outs regclass:$dst),
    498     (ins regclass:$a, regclass:$b, regclass:$c),
    499                       V2MADStr<asmstr>.s,
    500                       [(set regclass:$dst, (add
    501                         (mul regclass:$a, regclass:$b), regclass:$c))], sop>,
    502            Requires<[Pred]>;
    503 }
    504 multiclass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
    505   Predicate Pred> {
    506   def V2 : NVPTXVecInst<(outs regclass:$dst),
    507     (ins regclass:$a, regclass:$b, regclass:$c),
    508                       V2MADStr<asmstr>.s,
    509                       [(set regclass:$dst, (fadd
    510                         (fmul regclass:$a, regclass:$b), regclass:$c))], sop>,
    511            Requires<[Pred]>;
    512 }
    513 
    514 let VecInstType=isVecOther.Value in {
    515 defm I8MAD  : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>;
    516 defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr,
    517   true>;
    518 defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr,
    519   true>;
    520 defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>;
    521 
    522 defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>;
    523 
    524 defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>;
    525 defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>;
    526 defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>;
    527 
    528 defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
    529   FMAD32_ftzrrr, doFMADF32_ftz>;
    530 defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
    531   FMA32_ftzrrr, doFMAF32_ftz>;
    532 defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr,
    533   doFMADF32>;
    534 defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr,
    535   doFMAF32>;
    536 defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>;
    537 }
    538 
    539 let VecInstType=isVecOther.Value in {
    540 def V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs,
    541   FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
    542 def V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs,
    543   FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
    544 def V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs,
    545   FDIV32rr_prec>, Requires<[reqPTX20]>;
    546 def V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs,
    547   FDIV32rr_prec>, Requires<[reqPTX20]>;
    548 def V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs,
    549   FDIV32rr_ftz>, Requires<[doF32FTZ]>;
    550 def V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs,
    551   FDIV32rr_ftz>, Requires<[doF32FTZ]>;
    552 def V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>;
    553 def V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>;
    554 def V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>;
    555 }
    556 
    557 def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>;
    558 
    559 let VecInstType=isVecOther.Value in {
    560 def VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs,
    561   FNEGf32_ftz>, Requires<[doF32FTZ]>;
    562 def VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs,
    563   FNEGf32_ftz>, Requires<[doF32FTZ]>;
    564 def VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>;
    565 def VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>;
    566 def VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>;
    567 
    568 // Logical Arithmetic
    569 defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>;
    570 defm VOr  : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>;
    571 defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>;
    572 
    573 defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>;
    574 }
    575 
    576 
    577 multiclass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
    578   def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)),
    579           (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c,  V2F32Regs:$a)>,
    580           Requires<[Pred]>;
    581 
    582   def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c),
    583           (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>,
    584           Requires<[Pred]>;
    585 }
    586 
    587 defm V2FMAF32ext_ftz  : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>;
    588 defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>;
    589 defm V2FMAF32ext  : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>;
    590 defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>;
    591 
    592 multiclass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
    593   def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)),
    594           (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c,  V4F32Regs:$a)>,
    595           Requires<[Pred]>;
    596 
    597   def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c),
    598           (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>,
    599           Requires<[Pred]>;
    600 }
    601 
    602 defm V4FMAF32ext_ftz  : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>;
    603 defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>;
    604 defm V4FMAF32ext  : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>;
    605 defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>;
    606 
    607 multiclass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
    608   def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)),
    609           (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>,
    610           Requires<[Pred]>;
    611 
    612   def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c),
    613           (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>,
    614           Requires<[Pred]>;
    615 }
    616 
    617 defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>;
    618 
    619 class VecModStr<string vecsize, string elem, string extra, string l="">
    620 {
    621   string t1 = !strconcat("${c", elem);
    622   string t2 = !strconcat(t1, ":vecv");
    623   string t3 = !strconcat(t2, vecsize);
    624   string t4 = !strconcat(t3, extra);
    625   string t5 = !strconcat(t4, l);
    626   string s =  !strconcat(t5, "}");
    627 }
    628 class ShuffleOneLine<string vecsize, string elem, string type>
    629 {
    630   string t1 = VecModStr<vecsize, elem, "comm", "1">.s;
    631   string t2 = !strconcat(t1, "mov.");
    632   string t3 = !strconcat(t2, type);
    633   string t4 = !strconcat(t3, " \t${dst}_");
    634   string t5 = !strconcat(t4, elem);
    635   string t6 = !strconcat(t5, ", $src1");
    636   string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s);
    637   string t8 = !strconcat(t7, ";\n\t");
    638   string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s);
    639   string t10 = !strconcat(t9, "mov.");
    640   string t11 = !strconcat(t10, type);
    641   string t12 = !strconcat(t11, " \t${dst}_");
    642   string t13 = !strconcat(t12, elem);
    643   string t14 = !strconcat(t13, ", $src2");
    644   string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s);
    645   string s =   !strconcat(t15, ";");
    646 }
    647 class ShuffleAsmStr2<string type>
    648 {
    649   string t1 = ShuffleOneLine<"2", "0", type>.s;
    650   string t2 = !strconcat(t1, "\n\t");
    651   string s  = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s);
    652 }
    653 class ShuffleAsmStr4<string type>
    654 {
    655   string t1 = ShuffleOneLine<"4", "0", type>.s;
    656   string t2 = !strconcat(t1, "\n\t");
    657   string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s);
    658   string t4 = !strconcat(t3, "\n\t");
    659   string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s);
    660   string t6 = !strconcat(t5, "\n\t");
    661   string s  = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s);
    662 }
    663 
    664 let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in {
    665 def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst),
    666                        (ins  V4F32Regs:$src1, V4F32Regs:$src2,
    667                              i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
    668                  !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
    669                                  ShuffleAsmStr4<"f32">.s),
    670                        [], FMOV32rr>;
    671 
    672 def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst),
    673                        (ins  V4I32Regs:$src1, V4I32Regs:$src2,
    674                              i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
    675                  !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
    676                                  ShuffleAsmStr4<"u32">.s),
    677                        [], IMOV32rr>;
    678 
    679 def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst),
    680                        (ins  V4I16Regs:$src1, V4I16Regs:$src2,
    681                              i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
    682                  !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
    683                                  ShuffleAsmStr4<"u16">.s),
    684                        [], IMOV16rr>;
    685 
    686 def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst),
    687                        (ins  V4I8Regs:$src1, V4I8Regs:$src2,
    688                              i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
    689                  !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
    690                                  ShuffleAsmStr4<"u16">.s),
    691                        [], IMOV8rr>;
    692 
    693 def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst),
    694                        (ins  V2F32Regs:$src1, V2F32Regs:$src2,
    695                              i8imm:$c0, i8imm:$c1),
    696                        !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
    697                                  ShuffleAsmStr2<"f32">.s),
    698                        [], FMOV32rr>;
    699 
    700 def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst),
    701                        (ins  V2I32Regs:$src1, V2I32Regs:$src2,
    702                              i8imm:$c0, i8imm:$c1),
    703                        !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
    704                                  ShuffleAsmStr2<"u32">.s),
    705                        [], IMOV32rr>;
    706 
    707 def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst),
    708                        (ins  V2I8Regs:$src1, V2I8Regs:$src2,
    709                              i8imm:$c0, i8imm:$c1),
    710                        !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
    711                                  ShuffleAsmStr2<"u16">.s),
    712                        [], IMOV8rr>;
    713 
    714 def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst),
    715                        (ins  V2I16Regs:$src1, V2I16Regs:$src2,
    716                              i8imm:$c0, i8imm:$c1),
    717                        !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
    718                                  ShuffleAsmStr2<"u16">.s),
    719                        [], IMOV16rr>;
    720 
    721 def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst),
    722                        (ins  V2F64Regs:$src1, V2F64Regs:$src2,
    723                              i8imm:$c0, i8imm:$c1),
    724                        !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
    725                                  ShuffleAsmStr2<"f64">.s),
    726                        [], FMOV64rr>;
    727 
    728 def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst),
    729                        (ins  V2I64Regs:$src1, V2I64Regs:$src2,
    730                              i8imm:$c0, i8imm:$c1),
    731                        !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
    732                                  ShuffleAsmStr2<"u64">.s),
    733                        [], IMOV64rr>;
    734 }
    735 
    736 def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{
    737   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
    738   return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32);
    739 }]>;
    740 def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{
    741   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
    742   return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32);
    743 }]>;
    744 def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{
    745   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
    746   return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32);
    747 }]>;
    748 def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{
    749   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
    750   return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32);
    751 }]>;
    752 
    753 // The spurious call is here to silence a compiler warning about N being
    754 // unused.
    755 def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs),
    756                        (vector_shuffle node:$lhs, node:$rhs),
    757                        [{ N->getGluedNode(); return true; }]>;
    758 
    759 def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)),
    760           (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2,
    761                             (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
    762 
    763 def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)),
    764           (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2,
    765                             (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
    766                             (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
    767 
    768 def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)),
    769           (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2,
    770                             (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
    771 
    772 def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)),
    773           (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2,
    774                             (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
    775 
    776 def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)),
    777           (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2,
    778                             (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
    779                             (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
    780 
    781 def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)),
    782           (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2,
    783                             (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
    784 
    785 def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)),
    786           (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2,
    787                             (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
    788                             (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
    789 
    790 def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)),
    791           (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2,
    792                             (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
    793 
    794 def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)),
    795           (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2,
    796                             (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
    797                             (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
    798 
    799 def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)),
    800           (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2,
    801                             (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
    802 
    803 class Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
    804   NVPTXInst si>
    805                    : NVPTXVecInst<(outs vclass:$dst),
    806                    (ins  sclass:$a1, sclass:$a2),
    807                    !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"),
    808                    [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))],
    809                    si>;
    810 class Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
    811   NVPTXInst si>
    812                    : NVPTXVecInst<(outs vclass:$dst),
    813                    (ins  sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4),
    814                !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"),
    815                    [(set vclass:$dst,
    816                      (build_vector sclass:$a1, sclass:$a2,
    817                        sclass:$a3, sclass:$a4))], si>;
    818 
    819 let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in {
    820 def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs,
    821   FMOV32rr>;
    822 def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs,
    823   FMOV64rr>;
    824 
    825 def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs,
    826   IMOV32rr>;
    827 def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs,
    828   IMOV64rr>;
    829 def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs,
    830   IMOV16rr>;
    831 def Build_Vector2_i8  : Build_Vector2<"mov.v2.u16",  V2I8Regs,  Int8Regs,
    832   IMOV8rr>;
    833 
    834 def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs,
    835   FMOV32rr>;
    836 
    837 def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs,
    838   IMOV32rr>;
    839 def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs,
    840   IMOV16rr>;
    841 def Build_Vector4_i8  : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs,
    842   IMOV8rr>;
    843 }
    844 
    845 class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP>
    846                  : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src),
    847                    !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"),
    848                    [], sop>;
    849 
    850 let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1,
    851   VecInstType=isVecOther.Value in {
    852 def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>;
    853 def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>;
    854 
    855 def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>;
    856 def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>;
    857 
    858 def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>;
    859 def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>;
    860 
    861 def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>;
    862 def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>;
    863 
    864 def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>;
    865 def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>;
    866 }
    867 
    868 // extract subvector patterns
    869 def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR",
    870                         SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>;
    871 
    872 def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)),
    873                  (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0),
    874                                     (V4f32Extract V4F32Regs:$src, 1))>;
    875 def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)),
    876                  (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2),
    877                                     (V4f32Extract V4F32Regs:$src, 3))>;
    878 def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)),
    879                  (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0),
    880                                     (V4i32Extract V4I32Regs:$src, 1))>;
    881 def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)),
    882                  (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2),
    883                                     (V4i32Extract V4I32Regs:$src, 3))>;
    884 def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)),
    885                  (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0),
    886                                     (V4i16Extract V4I16Regs:$src, 1))>;
    887 def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)),
    888                  (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2),
    889                                     (V4i16Extract V4I16Regs:$src, 3))>;
    890 def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)),
    891                  (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0),
    892                                     (V4i8Extract V4I8Regs:$src, 1))>;
    893 def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)),
    894                  (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2),
    895                                     (V4i8Extract V4I8Regs:$src, 3))>;
    896 
    897 // Select instructions
    898 class Select_OneLine<string type, string pos> {
    899   string t1 = !strconcat("selp.", type);
    900   string t2 = !strconcat(t1, " \t${dst}_");
    901   string t3 = !strconcat(t2, pos);
    902   string t4 = !strconcat(t3, ", ${src1}_");
    903   string t5 = !strconcat(t4, pos);
    904   string t6 = !strconcat(t5, ", ${src2}_");
    905   string t7 = !strconcat(t6, pos);
    906   string s  = !strconcat(t7, ", $p;");
    907 }
    908 
    909 class Select_Str2<string type> {
    910   string t1 = Select_OneLine<type, "0">.s;
    911   string t2 = !strconcat(t1, "\n\t");
    912   string s  = !strconcat(t2, Select_OneLine<type, "1">.s);
    913 }
    914 
    915 class Select_Str4<string type> {
    916   string t1 = Select_OneLine<type, "0">.s;
    917   string t2 = !strconcat(t1, "\n\t");
    918   string t3 = !strconcat(t2, Select_OneLine<type, "1">.s);
    919   string t4 = !strconcat(t3, "\n\t");
    920   string t5 = !strconcat(t4, Select_OneLine<type, "2">.s);
    921   string t6 = !strconcat(t5, "\n\t");
    922   string s  = !strconcat(t6, Select_OneLine<type, "3">.s);
    923 
    924 }
    925 
    926 class Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop>
    927       : NVPTXVecInst<(outs vclass:$dst),
    928                      (ins  vclass:$src1, vclass:$src2, Int1Regs:$p),
    929                      asmstr,
    930                      [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1,
    931                        vclass:$src2))],
    932                      sop>;
    933 
    934 let VecInstType=isVecOther.Value in {
    935 def V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>;
    936 def V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>;
    937 def V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>;
    938 def V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>;
    939 def V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>;
    940 def V4I8_Select  : Vec_Select<V4I8Regs,  Select_Str4<"b16">.s, SELECTi8rr>;
    941 def V2I8_Select  : Vec_Select<V2I8Regs,  Select_Str2<"b16">.s, SELECTi8rr>;
    942 
    943 def V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>;
    944 def V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>;
    945 def V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>;
    946 }
    947 
    948 // Comparison instructions
    949 
    950 // setcc convenience fragments.
    951 def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs),
    952                       (setcc node:$lhs, node:$rhs, SETOEQ)>;
    953 def vsetogt : PatFrag<(ops node:$lhs, node:$rhs),
    954                       (setcc node:$lhs, node:$rhs, SETOGT)>;
    955 def vsetoge : PatFrag<(ops node:$lhs, node:$rhs),
    956                       (setcc node:$lhs, node:$rhs, SETOGE)>;
    957 def vsetolt : PatFrag<(ops node:$lhs, node:$rhs),
    958                       (setcc node:$lhs, node:$rhs, SETOLT)>;
    959 def vsetole : PatFrag<(ops node:$lhs, node:$rhs),
    960                       (setcc node:$lhs, node:$rhs, SETOLE)>;
    961 def vsetone : PatFrag<(ops node:$lhs, node:$rhs),
    962                       (setcc node:$lhs, node:$rhs, SETONE)>;
    963 def vseto   : PatFrag<(ops node:$lhs, node:$rhs),
    964                       (setcc node:$lhs, node:$rhs, SETO)>;
    965 def vsetuo  : PatFrag<(ops node:$lhs, node:$rhs),
    966                       (setcc node:$lhs, node:$rhs, SETUO)>;
    967 def vsetueq : PatFrag<(ops node:$lhs, node:$rhs),
    968                       (setcc node:$lhs, node:$rhs, SETUEQ)>;
    969 def vsetugt : PatFrag<(ops node:$lhs, node:$rhs),
    970                       (setcc node:$lhs, node:$rhs, SETUGT)>;
    971 def vsetuge : PatFrag<(ops node:$lhs, node:$rhs),
    972                       (setcc node:$lhs, node:$rhs, SETUGE)>;
    973 def vsetult : PatFrag<(ops node:$lhs, node:$rhs),
    974                       (setcc node:$lhs, node:$rhs, SETULT)>;
    975 def vsetule : PatFrag<(ops node:$lhs, node:$rhs),
    976                       (setcc node:$lhs, node:$rhs, SETULE)>;
    977 def vsetune : PatFrag<(ops node:$lhs, node:$rhs),
    978                       (setcc node:$lhs, node:$rhs, SETUNE)>;
    979 def vseteq  : PatFrag<(ops node:$lhs, node:$rhs),
    980                       (setcc node:$lhs, node:$rhs, SETEQ)>;
    981 def vsetgt  : PatFrag<(ops node:$lhs, node:$rhs),
    982                       (setcc node:$lhs, node:$rhs, SETGT)>;
    983 def vsetge  : PatFrag<(ops node:$lhs, node:$rhs),
    984                       (setcc node:$lhs, node:$rhs, SETGE)>;
    985 def vsetlt  : PatFrag<(ops node:$lhs, node:$rhs),
    986                       (setcc node:$lhs, node:$rhs, SETLT)>;
    987 def vsetle  : PatFrag<(ops node:$lhs, node:$rhs),
    988                       (setcc node:$lhs, node:$rhs, SETLE)>;
    989 def vsetne  : PatFrag<(ops node:$lhs, node:$rhs),
    990                       (setcc node:$lhs, node:$rhs, SETNE)>;
    991 
    992 class Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass,
    993   NVPTXInst sop>
    994     : NVPTXVecInst<(outs outrclass:$dst),
    995                    (ins  inrclass:$a, inrclass:$b),
    996                    "Unsupported",
    997                    [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))],
    998                    sop>;
    999 
   1000 multiclass Vec_Compare_All<PatFrag op,
   1001                            NVPTXInst inst8,
   1002                            NVPTXInst inst16,
   1003                            NVPTXInst inst32,
   1004                            NVPTXInst inst64>
   1005 {
   1006   def  V2I8 : Vec_Compare<op, V2I8Regs,  V2I8Regs,  inst8>;
   1007   def  V4I8 : Vec_Compare<op, V4I8Regs,  V4I8Regs,  inst8>;
   1008   def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>;
   1009   def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>;
   1010   def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>;
   1011   def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>;
   1012   def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>;
   1013 }
   1014 
   1015 let VecInstType=isVecOther.Value in {
   1016   defm VecSGT : Vec_Compare_All<vsetgt,  ISetSGTi8rr_toi8, ISetSGTi16rr_toi16,
   1017     ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>;
   1018   defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16,
   1019     ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>;
   1020   defm VecSLT : Vec_Compare_All<vsetlt,  ISetSLTi8rr_toi8, ISetSLTi16rr_toi16,
   1021     ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>;
   1022   defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16,
   1023     ISetULTi32rr_toi32, ISetULTi64rr_toi64>;
   1024   defm VecSGE : Vec_Compare_All<vsetge,  ISetSGEi8rr_toi8, ISetSGEi16rr_toi16,
   1025     ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>;
   1026   defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16,
   1027     ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>;
   1028   defm VecSLE : Vec_Compare_All<vsetle,  ISetSLEi8rr_toi8, ISetSLEi16rr_toi16,
   1029     ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>;
   1030   defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16,
   1031     ISetULEi32rr_toi32, ISetULEi64rr_toi64>;
   1032   defm VecSEQ : Vec_Compare_All<vseteq,  ISetSEQi8rr_toi8, ISetSEQi16rr_toi16,
   1033     ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>;
   1034   defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16,
   1035     ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>;
   1036   defm VecSNE : Vec_Compare_All<vsetne,  ISetSNEi8rr_toi8, ISetSNEi16rr_toi16,
   1037     ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>;
   1038   defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16,
   1039     ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>;
   1040 }
   1041 
   1042 multiclass FVec_Compare_All<PatFrag op,
   1043                             NVPTXInst instf32,
   1044                             NVPTXInst instf64>
   1045 {
   1046   def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>;
   1047   def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>;
   1048   def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>;
   1049 }
   1050 
   1051 let VecInstType=isVecOther.Value in {
   1052   defm FVecGT :  FVec_Compare_All<vsetogt, FSetGTf32rr_toi32,
   1053     FSetGTf64rr_toi64>;
   1054   defm FVecLT :  FVec_Compare_All<vsetolt, FSetLTf32rr_toi32,
   1055     FSetLTf64rr_toi64>;
   1056   defm FVecGE :  FVec_Compare_All<vsetoge, FSetGEf32rr_toi32,
   1057     FSetGEf64rr_toi64>;
   1058   defm FVecLE :  FVec_Compare_All<vsetole, FSetLEf32rr_toi32,
   1059     FSetLEf64rr_toi64>;
   1060   defm FVecEQ :  FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32,
   1061     FSetEQf64rr_toi64>;
   1062   defm FVecNE :  FVec_Compare_All<vsetone, FSetNEf32rr_toi32,
   1063     FSetNEf64rr_toi64>;
   1064 
   1065   defm FVecUGT :  FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32,
   1066     FSetUGTf64rr_toi64>;
   1067   defm FVecULT :  FVec_Compare_All<vsetult, FSetULTf32rr_toi32,
   1068     FSetULTf64rr_toi64>;
   1069   defm FVecUGE :  FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32,
   1070     FSetUGEf64rr_toi64>;
   1071   defm FVecULE :  FVec_Compare_All<vsetule, FSetULEf32rr_toi32,
   1072     FSetULEf64rr_toi64>;
   1073   defm FVecUEQ :  FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32,
   1074     FSetUEQf64rr_toi64>;
   1075   defm FVecUNE :  FVec_Compare_All<vsetune, FSetUNEf32rr_toi32,
   1076     FSetUNEf64rr_toi64>;
   1077 
   1078   defm FVecNUM :  FVec_Compare_All<vseto,  FSetNUMf32rr_toi32,
   1079     FSetNUMf64rr_toi64>;
   1080   defm FVecNAN :  FVec_Compare_All<vsetuo, FSetNANf32rr_toi32,
   1081     FSetNANf64rr_toi64>;
   1082 }
   1083 
   1084 class LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
   1085       NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4),
   1086                 (ins i32imm:$a, i32imm:$b),
   1087                 !strconcat(!strconcat("ld.param", opstr),
   1088                   "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>;
   1089 
   1090 class LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
   1091       NVPTXInst<(outs regclass:$d1, regclass:$d2),
   1092                 (ins i32imm:$a, i32imm:$b),
   1093                 !strconcat(!strconcat("ld.param", opstr),
   1094                   "\t{{$d1, $d2}}, [retval0+$b];"), []>;
   1095 
   1096 
   1097 class StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
   1098       NVPTXInst<(outs),
   1099                 (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
   1100                   i32imm:$a, i32imm:$b),
   1101                 !strconcat(!strconcat("st.param", opstr),
   1102                   "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>;
   1103 
   1104 class StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
   1105       NVPTXInst<(outs),
   1106                 (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b),
   1107                 !strconcat(!strconcat("st.param", opstr),
   1108                   "\t[param$a+$b], {{$s1, $s2}};"), []>;
   1109 
   1110 class StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> :
   1111       NVPTXInst<(outs),
   1112                 (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
   1113                   i32imm:$a),
   1114                 !strconcat(!strconcat("st.param", opstr),
   1115                   "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>;
   1116 
   1117 class StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> :
   1118       NVPTXInst<(outs),
   1119                 (ins regclass:$s1, regclass:$s2, i32imm:$a),
   1120                 !strconcat(!strconcat("st.param", opstr),
   1121                   "\t[func_retval+$a], {{$s1, $s2}};"), []>;
   1122 
   1123 def LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">;
   1124 def LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">;
   1125 def LoadParamScalar4I8  : LoadParamScalar4Inst<Int8Regs, ".v4.b8">;
   1126 
   1127 def LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">;
   1128 def LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">;
   1129 def LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">;
   1130 def LoadParamScalar2I8  : LoadParamScalar2Inst<Int32Regs, ".v2.b8">;
   1131 
   1132 def LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">;
   1133 def LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">;
   1134 def LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">;
   1135 
   1136 def StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">;
   1137 def StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">;
   1138 def StoreParamScalar4I8  : StoreParamScalar4Inst<Int8Regs, ".v4.b8">;
   1139 
   1140 def StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">;
   1141 def StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">;
   1142 def StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">;
   1143 def StoreParamScalar2I8  : StoreParamScalar2Inst<Int8Regs, ".v2.b8">;
   1144 
   1145 def StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">;
   1146 def StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">;
   1147 def StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">;
   1148 
   1149 def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">;
   1150 def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">;
   1151 def StoreRetvalScalar4I8  : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">;
   1152 
   1153 def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">;
   1154 def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">;
   1155 def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">;
   1156 def StoreRetvalScalar2I8  : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">;
   1157 
   1158 def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">;
   1159 def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">;
   1160 def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">;
   1161 
   1162 class LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>:
   1163       NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b),
   1164                 "loadparam : $dst <- [$a, $b]",
   1165                 [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))],
   1166                 sop>;
   1167 
   1168 class StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>
   1169       : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
   1170                 "storeparam : [$a, $b] <- $val",
   1171                 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>;
   1172 
   1173 class StoreRetvalVecInst<NVPTXRegClass regclass, string opstr,
   1174   NVPTXInst sop=NOP>
   1175       : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a),
   1176                 "storeretval : retval[$a] <- $val",
   1177                 [(StoreRetval (i32 imm:$a), regclass:$val)], sop>;
   1178 
   1179 let VecInstType=isVecLD.Value in {
   1180 def LoadParamV4I32  : LoadParamVecInst<V4I32Regs, ".v4.b32",
   1181   LoadParamScalar4I32>;
   1182 def LoadParamV4I16  : LoadParamVecInst<V4I16Regs, ".v4.b16",
   1183   LoadParamScalar4I16>;
   1184 def LoadParamV4I8   : LoadParamVecInst<V4I8Regs, ".v4.b8",
   1185   LoadParamScalar4I8>;
   1186 
   1187 def LoadParamV2I64  : LoadParamVecInst<V2I64Regs, ".v2.b64",
   1188   LoadParamScalar2I64>;
   1189 def LoadParamV2I32  : LoadParamVecInst<V2I32Regs, ".v2.b32",
   1190   LoadParamScalar2I32>;
   1191 def LoadParamV2I16  : LoadParamVecInst<V2I16Regs, ".v2.b16",
   1192   LoadParamScalar2I16>;
   1193 def LoadParamV2I8   : LoadParamVecInst<V2I8Regs, ".v2.b8",
   1194   LoadParamScalar2I8>;
   1195 
   1196 def LoadParamV4F32  : LoadParamVecInst<V4F32Regs, ".v4.f32",
   1197   LoadParamScalar4F32>;
   1198 def LoadParamV2F32  : LoadParamVecInst<V2F32Regs, ".v2.f32",
   1199   LoadParamScalar2F32>;
   1200 def LoadParamV2F64  : LoadParamVecInst<V2F64Regs, ".v2.f64",
   1201   LoadParamScalar2F64>;
   1202 }
   1203 
   1204 let VecInstType=isVecST.Value in {
   1205 def StoreParamV4I32  : StoreParamVecInst<V4I32Regs, ".v4.b32",
   1206   StoreParamScalar4I32>;
   1207 def StoreParamV4I16  : StoreParamVecInst<V4I16Regs, ".v4.b16",
   1208   StoreParamScalar4I16>;
   1209 def StoreParamV4I8   : StoreParamVecInst<V4I8Regs, ".v4.b8",
   1210   StoreParamScalar4I8>;
   1211 
   1212 def StoreParamV2I64  : StoreParamVecInst<V2I64Regs, ".v2.b64",
   1213   StoreParamScalar2I64>;
   1214 def StoreParamV2I32  : StoreParamVecInst<V2I32Regs, ".v2.b32",
   1215   StoreParamScalar2I32>;
   1216 def StoreParamV2I16  : StoreParamVecInst<V2I16Regs, ".v2.b16",
   1217   StoreParamScalar2I16>;
   1218 def StoreParamV2I8   : StoreParamVecInst<V2I8Regs, ".v2.b8",
   1219   StoreParamScalar2I8>;
   1220 
   1221 def StoreParamV4F32  : StoreParamVecInst<V4F32Regs, ".v4.f32",
   1222   StoreParamScalar4F32>;
   1223 def StoreParamV2F32  : StoreParamVecInst<V2F32Regs, ".v2.f32",
   1224   StoreParamScalar2F32>;
   1225 def StoreParamV2F64  : StoreParamVecInst<V2F64Regs, ".v2.f64",
   1226   StoreParamScalar2F64>;
   1227 
   1228 def StoreRetvalV4I32  : StoreRetvalVecInst<V4I32Regs, ".v4.b32",
   1229   StoreRetvalScalar4I32>;
   1230 def StoreRetvalV4I16  : StoreRetvalVecInst<V4I16Regs, ".v4.b16",
   1231   StoreRetvalScalar4I16>;
   1232 def StoreRetvalV4I8   : StoreRetvalVecInst<V4I8Regs,  ".v4.b8",
   1233   StoreRetvalScalar4I8>;
   1234 
   1235 def StoreRetvalV2I64  : StoreRetvalVecInst<V2I64Regs, ".v2.b64",
   1236   StoreRetvalScalar2I64>;
   1237 def StoreRetvalV2I32  : StoreRetvalVecInst<V2I32Regs, ".v2.b32",
   1238   StoreRetvalScalar2I32>;
   1239 def StoreRetvalV2I16  : StoreRetvalVecInst<V2I16Regs, ".v2.b16",
   1240   StoreRetvalScalar2I16>;
   1241 def StoreRetvalV2I8   : StoreRetvalVecInst<V2I8Regs,  ".v2.b8",
   1242   StoreRetvalScalar2I8>;
   1243 
   1244 def StoreRetvalV4F32  : StoreRetvalVecInst<V4F32Regs, ".v4.f32",
   1245   StoreRetvalScalar4F32>;
   1246 def StoreRetvalV2F32  : StoreRetvalVecInst<V2F32Regs, ".v2.f32",
   1247   StoreRetvalScalar2F32>;
   1248 def StoreRetvalV2F64  : StoreRetvalVecInst<V2F64Regs, ".v2.f64",
   1249   StoreRetvalScalar2F64>;
   1250 
   1251 }
   1252 
   1253 
   1254 // Int vector to int scalar bit convert
   1255 // v4i8 -> i32
   1256 def : Pat<(i32 (bitconvert V4I8Regs:$s)),
   1257           (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
   1258                      (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>;
   1259 // v4i16 -> i64
   1260 def : Pat<(i64 (bitconvert V4I16Regs:$s)),
   1261           (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
   1262             (V4i16Extract V4I16Regs:$s,1),
   1263                      (V4i16Extract V4I16Regs:$s,2),
   1264                      (V4i16Extract V4I16Regs:$s,3))>;
   1265 // v2i8 -> i16
   1266 def : Pat<(i16 (bitconvert V2I8Regs:$s)),
   1267           (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>;
   1268 // v2i16 -> i32
   1269 def : Pat<(i32 (bitconvert V2I16Regs:$s)),
   1270           (V2I16toI32 (V2i16Extract V2I16Regs:$s,0),
   1271             (V2i16Extract V2I16Regs:$s,1))>;
   1272 // v2i32 -> i64
   1273 def : Pat<(i64 (bitconvert V2I32Regs:$s)),
   1274           (V2I32toI64 (V2i32Extract V2I32Regs:$s,0),
   1275             (V2i32Extract V2I32Regs:$s,1))>;
   1276 
   1277 // Int scalar to int vector bit convert
   1278 let VecInstType=isVecDest.Value in {
   1279 // i32 -> v4i8
   1280 def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s),
   1281                                 "Error!",
   1282                                 [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))],
   1283                                 I32toV4I8>;
   1284 // i64 -> v4i16
   1285 def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s),
   1286                                  "Error!",
   1287                                 [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))],
   1288                                  I64toV4I16>;
   1289 // i16 -> v2i8
   1290 def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s),
   1291                                 "Error!",
   1292                                [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))],
   1293                                 I16toV2I8>;
   1294 // i32 -> v2i16
   1295 def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s),
   1296                                  "Error!",
   1297                                 [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))],
   1298                                  I32toV2I16>;
   1299 // i64 -> v2i32
   1300 def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s),
   1301                                   "Error!",
   1302                                 [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))],
   1303                                   I64toV2I32>;
   1304 }
   1305 
   1306 // Int vector to int vector bit convert
   1307 // v4i8 -> v2i16
   1308 def : Pat<(v2i16 (bitconvert V4I8Regs:$s)),
   1309           (VecI32toV2I16
   1310           (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
   1311                     (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
   1312 // v4i16 -> v2i32
   1313 def : Pat<(v2i32 (bitconvert V4I16Regs:$s)),
   1314           (VecI64toV2I32
   1315        (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
   1316                 (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
   1317 // v2i16 -> v4i8
   1318 def : Pat<(v4i8 (bitconvert V2I16Regs:$s)),
   1319           (VecI32toV4I8
   1320     (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
   1321 // v2i32 -> v4i16
   1322 def : Pat<(v4i16 (bitconvert V2I32Regs:$s)),
   1323           (VecI64toV4I16
   1324     (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
   1325 // v2i64 -> v4i32
   1326 def : Pat<(v4i32 (bitconvert V2I64Regs:$s)),
   1327           (Build_Vector4_i32
   1328             (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0),
   1329             (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1),
   1330             (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0),
   1331             (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>;
   1332 // v4i32 -> v2i64
   1333 def : Pat<(v2i64 (bitconvert V4I32Regs:$s)),
   1334           (Build_Vector2_i64
   1335       (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)),
   1336     (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>;
   1337 
   1338 // Fp scalar to fp vector convert
   1339 // f64 -> v2f32
   1340 let VecInstType=isVecDest.Value in {
   1341 def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s),
   1342                                   "Error!",
   1343                               [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))],
   1344                                   F64toV2F32>;
   1345 }
   1346 
   1347 // Fp vector to fp scalar convert
   1348 // v2f32 -> f64
   1349 def : Pat<(f64 (bitconvert V2F32Regs:$s)),
   1350      (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>;
   1351 
   1352 // Fp scalar to int vector convert
   1353 // f32 -> v4i8
   1354 def : Pat<(v4i8 (bitconvert Float32Regs:$s)),
   1355           (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>;
   1356 // f32 -> v2i16
   1357 def : Pat<(v2i16 (bitconvert Float32Regs:$s)),
   1358           (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>;
   1359 // f64 -> v4i16
   1360 def : Pat<(v4i16 (bitconvert Float64Regs:$s)),
   1361           (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>;
   1362 // f64 -> v2i32
   1363 def : Pat<(v2i32 (bitconvert Float64Regs:$s)),
   1364           (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>;
   1365 
   1366 // Int vector to fp scalar convert
   1367 // v4i8 -> f32
   1368 def : Pat<(f32 (bitconvert V4I8Regs:$s)),
   1369           (BITCONVERT_32_I2F
   1370           (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
   1371                     (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
   1372 // v4i16 -> f64
   1373 def : Pat<(f64 (bitconvert V4I16Regs:$s)),
   1374           (BITCONVERT_64_I2F
   1375        (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
   1376                 (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
   1377 // v2i16 -> f32
   1378 def : Pat<(f32 (bitconvert V2I16Regs:$s)),
   1379           (BITCONVERT_32_I2F
   1380     (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
   1381 // v2i32 -> f64
   1382 def : Pat<(f64 (bitconvert V2I32Regs:$s)),
   1383           (BITCONVERT_64_I2F
   1384     (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
   1385 
   1386 // Int scalar to fp vector convert
   1387 // i64 -> v2f32
   1388 def : Pat<(v2f32 (bitconvert Int64Regs:$s)),
   1389           (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>;
   1390 
   1391 // Fp vector to int scalar convert
   1392 // v2f32 -> i64
   1393 def : Pat<(i64 (bitconvert V2F32Regs:$s)),
   1394           (BITCONVERT_64_F2I
   1395     (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>;
   1396 
   1397 // Int vector to fp vector convert
   1398 // v2i64 -> v4f32
   1399 def : Pat<(v4f32 (bitconvert V2I64Regs:$s)),
   1400           (Build_Vector4_f32
   1401             (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
   1402               (V2i64Extract V2I64Regs:$s, 0)), 0)),
   1403             (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
   1404               (V2i64Extract V2I64Regs:$s, 0)), 1)),
   1405             (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
   1406               (V2i64Extract V2I64Regs:$s, 1)), 0)),
   1407             (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
   1408               (V2i64Extract V2I64Regs:$s, 1)), 1)))>;
   1409 // v2i64 -> v2f64
   1410 def : Pat<(v2f64 (bitconvert V2I64Regs:$s)),
   1411     (Build_Vector2_f64
   1412             (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)),
   1413             (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>;
   1414 // v2i32 -> v2f32
   1415 def : Pat<(v2f32 (bitconvert V2I32Regs:$s)),
   1416     (Build_Vector2_f32
   1417             (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)),
   1418             (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>;
   1419 // v4i32 -> v2f64
   1420 def : Pat<(v2f64 (bitconvert V4I32Regs:$s)),
   1421           (Build_Vector2_f64
   1422            (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0),
   1423              (V4i32Extract V4I32Regs:$s,1))),
   1424            (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2),
   1425              (V4i32Extract V4I32Regs:$s,3))))>;
   1426 // v4i32 -> v4f32
   1427 def : Pat<(v4f32 (bitconvert V4I32Regs:$s)),
   1428     (Build_Vector4_f32
   1429             (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)),
   1430             (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)),
   1431             (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)),
   1432             (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>;
   1433 // v4i16 -> v2f32
   1434 def : Pat<(v2f32 (bitconvert V4I16Regs:$s)),
   1435           (VecF64toV2F32 (BITCONVERT_64_I2F
   1436           (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
   1437             (V4i16Extract V4I16Regs:$s,1),
   1438                       (V4i16Extract V4I16Regs:$s,2),
   1439                       (V4i16Extract V4I16Regs:$s,3))))>;
   1440 
   1441 // Fp vector to int vector convert
   1442 // v2i64 <- v4f32
   1443 def : Pat<(v2i64 (bitconvert V4F32Regs:$s)),
   1444           (Build_Vector2_i64
   1445            (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0),
   1446              (V4f32Extract V4F32Regs:$s,1))),
   1447            (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2),
   1448              (V4f32Extract V4F32Regs:$s,3))))>;
   1449 // v2i64 <- v2f64
   1450 def : Pat<(v2i64 (bitconvert V2F64Regs:$s)),
   1451     (Build_Vector2_i64
   1452             (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)),
   1453             (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>;
   1454 // v2i32 <- v2f32
   1455 def : Pat<(v2i32 (bitconvert V2F32Regs:$s)),
   1456     (Build_Vector2_i32
   1457             (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)),
   1458             (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>;
   1459 // v4i32 <- v2f64
   1460 def : Pat<(v4i32 (bitconvert V2F64Regs:$s)),
   1461           (Build_Vector4_i32
   1462             (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
   1463               (V2f64Extract V2F64Regs:$s, 0)), 0)),
   1464             (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
   1465               (V2f64Extract V2F64Regs:$s, 0)), 1)),
   1466             (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
   1467               (V2f64Extract V2F64Regs:$s, 1)), 0)),
   1468             (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
   1469               (V2f64Extract V2F64Regs:$s, 1)), 1)))>;
   1470 // v4i32 <- v4f32
   1471 def : Pat<(v4i32 (bitconvert V4F32Regs:$s)),
   1472           (Build_Vector4_i32
   1473             (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)),
   1474             (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)),
   1475             (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)),
   1476             (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>;
   1477 // v4i16 <- v2f32
   1478 def : Pat<(v4i16 (bitconvert V2F32Regs:$s)),
   1479           (VecI64toV4I16 (BITCONVERT_64_F2I
   1480           (V2F32toF64 (V2f32Extract V2F32Regs:$s,0),
   1481             (V2f32Extract V2F32Regs:$s,1))))>;
   1482