Home | History | Annotate | Download | only in NVPTX
      1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 def immFloat0 : PatLeaf<(fpimm), [{
     11     float f = (float)N->getValueAPF().convertToFloat();
     12     return (f==0.0f);
     13 }]>;
     14 
     15 def immFloat1 : PatLeaf<(fpimm), [{
     16     float f = (float)N->getValueAPF().convertToFloat();
     17     return (f==1.0f);
     18 }]>;
     19 
     20 def immDouble0 : PatLeaf<(fpimm), [{
     21     double d = (double)N->getValueAPF().convertToDouble();
     22     return (d==0.0);
     23 }]>;
     24 
     25 def immDouble1 : PatLeaf<(fpimm), [{
     26     double d = (double)N->getValueAPF().convertToDouble();
     27     return (d==1.0);
     28 }]>;
     29 
     30 
     31 
     32 //-----------------------------------
     33 // Synchronization and shuffle functions
     34 //-----------------------------------
     35 let isConvergent = 1 in {
     36 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
     37                   "bar.sync \t0;",
     38       [(int_nvvm_barrier0)]>;
     39 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
     40                   "bar.sync \t$src1;",
     41       [(int_nvvm_barrier_n Int32Regs:$src1)]>;
     42 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
     43                   "bar.sync \t$src1, $src2;",
     44       [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
     45 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
     46   !strconcat("{{ \n\t",
     47              ".reg .pred \t%p1; \n\t",
     48              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
     49              "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
     50              "}}"),
     51       [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
     52 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
     53   !strconcat("{{ \n\t",
     54              ".reg .pred \t%p1; \n\t",
     55              ".reg .pred \t%p2; \n\t",
     56              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
     57              "bar.red.and.pred \t%p2, 0, %p1; \n\t",
     58              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
     59              "}}"),
     60       [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
     61 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
     62   !strconcat("{{ \n\t",
     63              ".reg .pred \t%p1; \n\t",
     64              ".reg .pred \t%p2; \n\t",
     65              "setp.ne.u32 \t%p1, $pred, 0; \n\t",
     66              "bar.red.or.pred \t%p2, 0, %p1; \n\t",
     67              "selp.u32 \t$dst, 1, 0, %p2; \n\t",
     68              "}}"),
     69       [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
     70 
     71 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
     72                              [(int_nvvm_bar_sync imm:$i)]>;
     73 
     74 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
     75                              [(int_nvvm_bar_warp_sync imm:$i)]>,
     76         Requires<[hasPTX60, hasSM30]>;
     77 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
     78                              [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
     79         Requires<[hasPTX60, hasSM30]>;
     80 
     81 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
     82                                    [(int_nvvm_barrier_sync imm:$i)]>,
     83         Requires<[hasPTX60, hasSM30]>;
     84 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
     85                                    [(int_nvvm_barrier_sync Int32Regs:$i)]>,
     86         Requires<[hasPTX60, hasSM30]>;
     87 
     88 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
     89                  "barrier.sync \t$id, $cnt;",
     90                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
     91         Requires<[hasPTX60, hasSM30]>;
     92 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
     93                  "barrier.sync \t$id, $cnt;",
     94                  [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
     95         Requires<[hasPTX60, hasSM30]>;
     96 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
     97                  "barrier.sync \t$id, $cnt;",
     98                  [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
     99         Requires<[hasPTX60, hasSM30]>;
    100 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
    101                  "barrier.sync \t$id, $cnt;",
    102                  [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
    103         Requires<[hasPTX60, hasSM30]>;
    104 
    105 
    106 // shfl.{up,down,bfly,idx}.b32
    107 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
    108   // The last two parameters to shfl can be regs or imms.  ptxas is smart
    109   // enough to inline constant registers, so strictly speaking we don't need to
    110   // handle immediates here.  But it's easy enough, and it makes our ptx more
    111   // readable.
    112   def reg : NVPTXInst<
    113       (outs regclass:$dst),
    114       (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
    115       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
    116       [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
    117 
    118   def imm1 : NVPTXInst<
    119       (outs regclass:$dst),
    120       (ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
    121       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
    122       [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
    123 
    124   def imm2 : NVPTXInst<
    125       (outs regclass:$dst),
    126       (ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
    127       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
    128       [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
    129 
    130   def imm3 : NVPTXInst<
    131       (outs regclass:$dst),
    132       (ins regclass:$src, i32imm:$offset, i32imm:$mask),
    133       !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
    134       [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
    135 }
    136 
    137 defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
    138 defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
    139 defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
    140 defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
    141 defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
    142 defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
    143 defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
    144 defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
    145 
    146 multiclass SHFL_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
    147   // Threadmask and the last two parameters to shfl.sync can be regs or imms.
    148   // ptxas is smart enough to inline constant registers, so strictly speaking we
    149   // don't need to handle immediates here.  But it's easy enough, and it makes
    150   // our ptx more readable.
    151   def rrr : NVPTXInst<
    152       (outs regclass:$dst),
    153       (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
    154       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
    155       [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
    156                             Int32Regs:$offset, Int32Regs:$mask))]>;
    157 
    158   def rri : NVPTXInst<
    159       (outs regclass:$dst),
    160       (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
    161       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
    162       [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
    163                             Int32Regs:$offset, imm:$mask))]>;
    164 
    165   def rir : NVPTXInst<
    166       (outs regclass:$dst),
    167       (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
    168       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
    169       [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
    170                             imm:$offset, Int32Regs:$mask))]>;
    171 
    172   def rii : NVPTXInst<
    173       (outs regclass:$dst),
    174       (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
    175       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
    176       [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
    177                             imm:$offset, imm:$mask))]>;
    178 
    179   def irr : NVPTXInst<
    180       (outs regclass:$dst),
    181       (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
    182       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
    183       [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
    184                             Int32Regs:$offset, Int32Regs:$mask))]>;
    185 
    186   def iri : NVPTXInst<
    187       (outs regclass:$dst),
    188       (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
    189       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
    190       [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
    191                             Int32Regs:$offset, imm:$mask))]>;
    192 
    193   def iir : NVPTXInst<
    194       (outs regclass:$dst),
    195       (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
    196       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
    197       [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
    198                             imm:$offset, Int32Regs:$mask))]>;
    199 
    200   def iii : NVPTXInst<
    201       (outs regclass:$dst),
    202       (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
    203       !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
    204       [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
    205                             imm:$offset, imm:$mask))]>;
    206 }
    207 
    208 // On sm_70 these don't have to be convergent, so we may eventually want to
    209 // implement non-convergent variant of this intrinsic.
    210 defm INT_SHFL_SYNC_DOWN_I32 : SHFL_SYNC<Int32Regs, "down", int_nvvm_shfl_sync_down_i32>;
    211 defm INT_SHFL_SYNC_DOWN_F32 : SHFL_SYNC<Float32Regs, "down", int_nvvm_shfl_sync_down_f32>;
    212 defm INT_SHFL_SYNC_UP_I32 : SHFL_SYNC<Int32Regs, "up", int_nvvm_shfl_sync_up_i32>;
    213 defm INT_SHFL_SYNC_UP_F32 : SHFL_SYNC<Float32Regs, "up", int_nvvm_shfl_sync_up_f32>;
    214 defm INT_SHFL_SYNC_BFLY_I32 : SHFL_SYNC<Int32Regs, "bfly", int_nvvm_shfl_sync_bfly_i32>;
    215 defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC<Float32Regs, "bfly", int_nvvm_shfl_sync_bfly_f32>;
    216 defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC<Int32Regs, "idx", int_nvvm_shfl_sync_idx_i32>;
    217 defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC<Float32Regs, "idx", int_nvvm_shfl_sync_idx_f32>;
    218 
    219 
    220 // vote.{all,any,uni,ballot}
    221 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
    222   def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
    223               "vote." # mode # " \t$dest, $pred;",
    224               [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
    225         Requires<[hasPTX60, hasSM30]>;
    226 }
    227 
    228 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
    229 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
    230 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
    231 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
    232 
    233 // vote.sync.{all,any,uni,ballot}
    234 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
    235   def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
    236               "vote.sync." # mode # " \t$dest, $pred, $mask;",
    237               [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
    238           Requires<[hasPTX60, hasSM30]>;
    239   def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
    240               "vote.sync." # mode #" \t$dest, $pred, $mask;",
    241               [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
    242           Requires<[hasPTX60, hasSM30]>;
    243 }
    244 
    245 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
    246 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
    247 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
    248 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
    249 
    250 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
    251                           Operand ImmOp> {
    252   def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
    253               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
    254               [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
    255            Requires<[hasPTX60, hasSM70]>;
    256   def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
    257               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
    258               [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
    259            Requires<[hasPTX60, hasSM70]>;
    260   def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
    261               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
    262               [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
    263            Requires<[hasPTX60, hasSM70]>;
    264   def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
    265               "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
    266               [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
    267            Requires<[hasPTX60, hasSM70]>;
    268 }
    269 
    270 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
    271                                         i32imm>;
    272 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
    273                                         i64imm>;
    274 
    275 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
    276                           Operand ImmOp> {
    277   def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
    278                      (ins i32imm:$mask, ImmOp:$value),
    279               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
    280               [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
    281            Requires<[hasPTX60, hasSM70]>;
    282   def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
    283                      (ins Int32Regs:$mask, ImmOp:$value),
    284               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
    285               [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
    286            Requires<[hasPTX60, hasSM70]>;
    287   def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
    288                      (ins i32imm:$mask, regclass:$value),
    289               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
    290               [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
    291            Requires<[hasPTX60, hasSM70]>;
    292   def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
    293                      (ins Int32Regs:$mask, regclass:$value),
    294               "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
    295               [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
    296            Requires<[hasPTX60, hasSM70]>;
    297 }
    298 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
    299                                          i32imm>;
    300 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
    301                                          i64imm>;
    302 
    303 } // isConvergent = 1
    304 
    305 //-----------------------------------
    306 // Explicit Memory Fence Functions
    307 //-----------------------------------
    308 class MEMBAR<string StrOp, Intrinsic IntOP> :
    309               NVPTXInst<(outs), (ins),
    310             StrOp, [(IntOP)]>;
    311 
    312 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
    313 def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
    314 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
    315 
    316 
    317 //-----------------------------------
    318 // Math Functions
    319 //-----------------------------------
    320 
    321 // Map min(1.0, max(0.0, x)) to sat(x)
    322 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
    323 // NaN
    324 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
    325 // Same story for fmax, fmin.
    326 
    327 def : Pat<(int_nvvm_fmin_f immFloat1,
    328             (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
    329           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
    330 def : Pat<(int_nvvm_fmin_f immFloat1,
    331             (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
    332           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
    333 def : Pat<(int_nvvm_fmin_f
    334             (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
    335           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
    336 def : Pat<(int_nvvm_fmin_f
    337             (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
    338           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
    339 
    340 def : Pat<(int_nvvm_fmin_d immDouble1,
    341             (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
    342           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
    343 def : Pat<(int_nvvm_fmin_d immDouble1,
    344             (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
    345           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
    346 def : Pat<(int_nvvm_fmin_d
    347             (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
    348           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
    349 def : Pat<(int_nvvm_fmin_d
    350             (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
    351           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
    352 
    353 
    354 // We need a full string for OpcStr here because we need to deal with case like
    355 // INT_PTX_RECIP.
    356 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
    357   NVPTXRegClass src_regclass, Intrinsic IntOP>
    358             : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
    359             OpcStr,
    360         [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
    361 
    362 // We need a full string for OpcStr here because we need to deal with the case
    363 // like INT_PTX_NATIVE_POWR_F.
    364 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
    365   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
    366             : NVPTXInst<(outs t_regclass:$dst),
    367               (ins s0_regclass:$src0, s1_regclass:$src1),
    368             OpcStr,
    369         [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
    370 
    371 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
    372   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
    373   NVPTXRegClass s2_regclass, Intrinsic IntOP>
    374             : NVPTXInst<(outs t_regclass:$dst),
    375               (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
    376             OpcStr,
    377         [(set t_regclass:$dst,
    378           (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
    379 
    380 //
    381 // MISC
    382 //
    383 
    384 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
    385   Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
    386 
    387 //
    388 // Min Max
    389 //
    390 
    391 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
    392   Float32Regs, Float32Regs, int_nvvm_fmin_f>;
    393 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
    394   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
    395 
    396 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
    397   Float32Regs, Float32Regs, int_nvvm_fmax_f>;
    398 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
    399   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
    400 
    401 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
    402   Float64Regs, Float64Regs, int_nvvm_fmin_d>;
    403 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
    404   Float64Regs, Float64Regs, int_nvvm_fmax_d>;
    405 
    406 
    407 //
    408 // Multiplication
    409 //
    410 
    411 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
    412   Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
    413 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
    414   Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
    415 
    416 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
    417   Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
    418 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
    419   Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
    420 
    421 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
    422   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
    423 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
    424   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
    425 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
    426   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
    427 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
    428   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
    429 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
    430   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
    431 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
    432   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
    433 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
    434   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
    435 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
    436   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
    437 
    438 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
    439   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
    440 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
    441   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
    442 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
    443   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
    444 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
    445   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
    446 
    447 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
    448   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
    449 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
    450   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
    451 
    452 //
    453 // Div
    454 //
    455 
    456 def INT_NVVM_DIV_APPROX_FTZ_F
    457   : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
    458     Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
    459 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
    460   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
    461 
    462 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
    463   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
    464 def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
    465   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
    466 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
    467   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
    468 def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
    469   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
    470 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
    471   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
    472 def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
    473   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
    474 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
    475   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
    476 def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
    477   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
    478 
    479 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
    480   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
    481 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
    482   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
    483 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
    484   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
    485 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
    486   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
    487 
    488 //
    489 // Sad
    490 //
    491 
    492 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
    493   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
    494 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
    495   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
    496 
    497 //
    498 // Floor  Ceil
    499 //
    500 
    501 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
    502           (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
    503 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
    504           (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
    505 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
    506           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
    507 
    508 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
    509           (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
    510 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
    511           (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
    512 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
    513           (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
    514 
    515 //
    516 // Abs
    517 //
    518 
    519 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
    520   Float32Regs, int_nvvm_fabs_ftz_f>;
    521 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
    522   Float32Regs, int_nvvm_fabs_f>;
    523 
    524 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
    525   Float64Regs, int_nvvm_fabs_d>;
    526 
    527 //
    528 // Round
    529 //
    530 
    531 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
    532           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
    533 def : Pat<(int_nvvm_round_f Float32Regs:$a),
    534           (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
    535 def : Pat<(int_nvvm_round_d Float64Regs:$a),
    536           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
    537 
    538 //
    539 // Trunc
    540 //
    541 
    542 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
    543           (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
    544 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
    545           (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
    546 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
    547           (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
    548 
    549 //
    550 // Saturate
    551 //
    552 
    553 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
    554           (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
    555 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
    556           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
    557 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
    558           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
    559 
    560 //
    561 // Exp2  Log2
    562 //
    563 
    564 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
    565   Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
    566 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
    567   Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
    568 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
    569   Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
    570 
    571 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
    572   Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
    573 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
    574   Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
    575 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
    576   Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
    577 
    578 //
    579 // Sin  Cos
    580 //
    581 
    582 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
    583   Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
    584 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
    585   Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
    586 
    587 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
    588   Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
    589 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
    590   Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
    591 
    592 //
    593 // Fma
    594 //
    595 
    596 def INT_NVVM_FMA_RN_FTZ_F
    597   : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
    598     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
    599 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
    600   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
    601 def INT_NVVM_FMA_RZ_FTZ_F
    602   : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
    603     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
    604 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
    605   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
    606 def INT_NVVM_FMA_RM_FTZ_F
    607   : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
    608     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
    609 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
    610   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
    611 def INT_NVVM_FMA_RP_FTZ_F
    612   : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
    613     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
    614 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
    615   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
    616 
    617 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
    618   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
    619 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
    620   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
    621 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
    622   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
    623 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
    624   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
    625 
    626 //
    627 // Rcp
    628 //
    629 
    630 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
    631   Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
    632 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
    633   Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
    634 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
    635   Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
    636 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
    637   Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
    638 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
    639   Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
    640 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
    641   Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
    642 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
    643   Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
    644 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
    645   Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
    646 
    647 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
    648   Float64Regs, int_nvvm_rcp_rn_d>;
    649 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
    650   Float64Regs, int_nvvm_rcp_rz_d>;
    651 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
    652   Float64Regs, int_nvvm_rcp_rm_d>;
    653 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
    654   Float64Regs, int_nvvm_rcp_rp_d>;
    655 
    656 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
    657   Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
    658 
    659 //
    660 // Sqrt
    661 //
    662 
    663 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
    664   Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
    665 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
    666   Float32Regs, int_nvvm_sqrt_rn_f>;
    667 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
    668   Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
    669 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
    670   Float32Regs, int_nvvm_sqrt_rz_f>;
    671 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
    672   Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
    673 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
    674   Float32Regs, int_nvvm_sqrt_rm_f>;
    675 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
    676   Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
    677 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
    678   Float32Regs, int_nvvm_sqrt_rp_f>;
    679 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
    680   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
    681 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
    682   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
    683 
    684 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
    685   Float64Regs, int_nvvm_sqrt_rn_d>;
    686 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
    687   Float64Regs, int_nvvm_sqrt_rz_d>;
    688 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
    689   Float64Regs, int_nvvm_sqrt_rm_d>;
    690 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
    691   Float64Regs, int_nvvm_sqrt_rp_d>;
    692 
    693 // nvvm_sqrt intrinsic
    694 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
    695           (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
    696 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
    697           (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
    698 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
    699           (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
    700 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
    701           (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
    702 
    703 //
    704 // Rsqrt
    705 //
    706 
    707 def INT_NVVM_RSQRT_APPROX_FTZ_F
    708   : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
    709     int_nvvm_rsqrt_approx_ftz_f>;
    710 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
    711   Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
    712 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
    713   Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
    714 
    715 //
    716 // Add
    717 //
    718 
    719 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
    720   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
    721 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
    722   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
    723 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
    724   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
    725 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
    726   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
    727 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
    728   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
    729 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
    730   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
    731 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
    732   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
    733 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
    734   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
    735 
    736 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
    737   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
    738 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
    739   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
    740 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
    741   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
    742 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
    743   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
    744 
    745 //
    746 // Convert
    747 //
    748 
    749 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
    750           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
    751 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
    752           (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
    753 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
    754           (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
    755 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
    756           (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
    757 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
    758           (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
    759 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
    760           (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
    761 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
    762           (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
    763 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
    764           (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
    765 
    766 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
    767           (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
    768 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
    769           (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
    770 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
    771           (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
    772 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
    773           (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
    774 
    775 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
    776           (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
    777 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
    778           (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
    779 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
    780           (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
    781 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
    782           (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
    783 
    784 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
    785           (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
    786 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
    787           (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
    788 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
    789           (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
    790 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
    791           (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
    792 
    793 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
    794           (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
    795 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
    796           (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
    797 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
    798           (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
    799 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
    800           (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
    801 
    802 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
    803           (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
    804 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
    805           (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
    806 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
    807           (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
    808 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
    809           (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
    810 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
    811           (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
    812 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
    813           (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
    814 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
    815           (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
    816 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
    817           (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
    818 
    819 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
    820           (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
    821 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
    822           (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
    823 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
    824           (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
    825 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
    826           (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
    827 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
    828           (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
    829 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
    830           (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
    831 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
    832           (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
    833 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
    834           (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
    835 
    836 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
    837           (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
    838 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
    839           (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
    840 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
    841           (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
    842 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
    843           (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
    844 
    845 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
    846           (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
    847 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
    848           (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
    849 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
    850           (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
    851 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
    852           (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
    853 
    854 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
    855   Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
    856 
    857 def INT_NVVM_D2I_LO : F_MATH_1<
    858   !strconcat("{{\n\t",
    859              ".reg .b32 %temp; \n\t",
    860              "mov.b64 \t{$dst, %temp}, $src0;\n\t",
    861              "}}"),
    862   Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
    863 def INT_NVVM_D2I_HI : F_MATH_1<
    864   !strconcat("{{\n\t",
    865              ".reg .b32 %temp; \n\t",
    866              "mov.b64 \t{%temp, $dst}, $src0;\n\t",
    867              "}}"),
    868   Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
    869 
    870 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
    871           (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
    872 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
    873           (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
    874 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
    875           (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
    876 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
    877           (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
    878 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
    879           (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
    880 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
    881           (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
    882 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
    883           (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
    884 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
    885           (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
    886 
    887 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
    888           (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
    889 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
    890           (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
    891 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
    892           (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
    893 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
    894           (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
    895 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
    896           (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
    897 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
    898           (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
    899 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
    900           (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
    901 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
    902           (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
    903 
    904 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
    905           (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
    906 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
    907           (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
    908 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
    909           (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
    910 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
    911           (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
    912 
    913 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
    914           (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
    915 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
    916           (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
    917 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
    918           (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
    919 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
    920           (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
    921 
    922 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
    923           (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
    924 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
    925           (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
    926 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
    927           (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
    928 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
    929           (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
    930 
    931 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
    932           (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
    933 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
    934           (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
    935 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
    936           (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
    937 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
    938           (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
    939 
    940 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
    941           (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
    942 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
    943           (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
    944 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
    945           (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
    946 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
    947           (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
    948 
    949 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
    950           (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
    951 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
    952           (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
    953 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
    954           (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
    955 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
    956           (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
    957 
    958 
    959 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
    960           (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
    961 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
    962           (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
    963 
    964 //
    965 // Bitcast
    966 //
    967 
    968 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
    969   Float32Regs, int_nvvm_bitcast_f2i>;
    970 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
    971   Int32Regs, int_nvvm_bitcast_i2f>;
    972 
    973 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
    974   Int64Regs, int_nvvm_bitcast_ll2d>;
    975 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
    976   Float64Regs, int_nvvm_bitcast_d2ll>;
    977 
    978 //
    979 // FNS
    980 //
    981 
    982 class INT_FNS_MBO<dag ins, dag Operands>
    983   : NVPTXInst<(outs Int32Regs:$dst), ins,
    984                "fns.b32 \t$dst, $mask, $base, $offset;",
    985                [(set Int32Regs:$dst, Operands )]>,
    986     Requires<[hasPTX60, hasSM30]>;
    987 
    988 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
    989                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
    990 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
    991                      (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
    992 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
    993                      (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
    994 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
    995                      (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
    996 def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
    997                      (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
    998 def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
    999                      (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
   1000 def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
   1001                      (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
   1002 def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
   1003                      (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
   1004 
   1005 //-----------------------------------
   1006 // Atomic Functions
   1007 //-----------------------------------
   1008 
   1009 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
   1010  : PatFrag<ops, frag, [{
   1011    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
   1012 }]>;
   1013 class ATOMIC_SHARED_CHK <dag ops, dag frag>
   1014  : PatFrag<ops, frag, [{
   1015    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
   1016 }]>;
   1017 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
   1018  : PatFrag<ops, frag, [{
   1019    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
   1020 }]>;
   1021 
   1022 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
   1023   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
   1024   Operand IMMType, SDNode IMM, list<Predicate> Pred> {
   1025   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
   1026     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
   1027     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
   1028   Requires<Pred>;
   1029   def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
   1030     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
   1031     [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
   1032   Requires<Pred>;
   1033 }
   1034 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
   1035   string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
   1036   list<Predicate> Pred = []> {
   1037   defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
   1038     IntOp, IMMType, IMM, Pred>;
   1039   defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
   1040     IntOp, IMMType, IMM, Pred>;
   1041 }
   1042 
   1043 // has 2 operands, neg the second one
   1044 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
   1045   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
   1046   Operand IMMType, list<Predicate> Pred> {
   1047   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
   1048     !strconcat(
   1049       "{{ \n\t",
   1050       ".reg \t.s", TypeStr, " temp; \n\t",
   1051       "neg.s", TypeStr, " \ttemp, $b; \n\t",
   1052       "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
   1053       "}}"),
   1054     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
   1055   Requires<Pred>;
   1056 }
   1057 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
   1058   string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
   1059   list<Predicate> Pred = []> {
   1060  defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
   1061    IntOp, IMMType, Pred> ;
   1062  defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
   1063    IntOp, IMMType, Pred> ;
   1064 }
   1065 
   1066 // has 3 operands
   1067 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
   1068   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
   1069   Operand IMMType, list<Predicate> Pred> {
   1070   def reg : NVPTXInst<(outs regclass:$dst),
   1071     (ins ptrclass:$addr, regclass:$b, regclass:$c),
   1072     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
   1073     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
   1074   Requires<Pred>;
   1075 
   1076   def imm1 : NVPTXInst<(outs regclass:$dst),
   1077     (ins ptrclass:$addr, IMMType:$b, regclass:$c),
   1078     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
   1079     [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
   1080   Requires<Pred>;
   1081 
   1082   def imm2 : NVPTXInst<(outs regclass:$dst),
   1083     (ins ptrclass:$addr, regclass:$b, IMMType:$c),
   1084     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
   1085     [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
   1086   Requires<Pred>;
   1087 
   1088   def imm3 : NVPTXInst<(outs regclass:$dst),
   1089     (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
   1090     !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
   1091     [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
   1092   Requires<Pred>;
   1093 }
   1094 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
   1095   string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
   1096   defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
   1097     IntOp, IMMType, Pred>;
   1098   defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
   1099     IntOp, IMMType, Pred>;
   1100 }
   1101 
   1102 // atom_add
   1103 
   1104 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1105   (atomic_load_add_32 node:$a, node:$b)>;
   1106 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1107   (atomic_load_add_32 node:$a, node:$b)>;
   1108 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1109   (atomic_load_add_32 node:$a, node:$b)>;
   1110 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1111   (atomic_load_add_64 node:$a, node:$b)>;
   1112 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1113   (atomic_load_add_64 node:$a, node:$b)>;
   1114 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1115   (atomic_load_add_64 node:$a, node:$b)>;
   1116 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1117   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
   1118 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1119   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
   1120 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1121   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
   1122 def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1123   (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
   1124 def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1125   (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
   1126 def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1127   (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
   1128 
   1129 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
   1130   atomic_load_add_32_g, i32imm, imm>;
   1131 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
   1132   atomic_load_add_32_s, i32imm, imm>;
   1133 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
   1134   atomic_load_add_32_gen, i32imm, imm>;
   1135 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
   1136   ".add", atomic_load_add_32_gen, i32imm, imm>;
   1137 
   1138 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
   1139   atomic_load_add_64_g, i64imm, imm>;
   1140 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
   1141   atomic_load_add_64_s, i64imm, imm>;
   1142 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
   1143   atomic_load_add_64_gen, i64imm, imm>;
   1144 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
   1145   ".add", atomic_load_add_64_gen, i64imm, imm>;
   1146 
   1147 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
   1148   atomic_load_add_f32_g, f32imm, fpimm>;
   1149 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
   1150   atomic_load_add_f32_s, f32imm, fpimm>;
   1151 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
   1152   atomic_load_add_f32_gen, f32imm, fpimm>;
   1153 
   1154 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
   1155   atomic_load_add_f64_g, f64imm, fpimm, [hasAtomAddF64]>;
   1156 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
   1157   atomic_load_add_f64_s, f64imm, fpimm, [hasAtomAddF64]>;
   1158 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
   1159   atomic_load_add_f64_gen, f64imm, fpimm, [hasAtomAddF64]>;
   1160 
   1161 // atom_sub
   1162 
   1163 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1164   (atomic_load_sub_32 node:$a, node:$b)>;
   1165 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1166   (atomic_load_sub_32 node:$a, node:$b)>;
   1167 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1168   (atomic_load_sub_32 node:$a, node:$b)>;
   1169 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1170   (atomic_load_sub_64 node:$a, node:$b)>;
   1171 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1172   (atomic_load_sub_64 node:$a, node:$b)>;
   1173 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1174   (atomic_load_sub_64 node:$a, node:$b)>;
   1175 
   1176 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
   1177   atomic_load_sub_32_g, i32imm>;
   1178 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
   1179   atomic_load_sub_64_g, i64imm>;
   1180 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
   1181   atomic_load_sub_32_gen, i32imm>;
   1182 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
   1183   ".add", atomic_load_sub_32_gen, i32imm>;
   1184 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
   1185   atomic_load_sub_32_s, i32imm>;
   1186 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
   1187   atomic_load_sub_64_s, i64imm>;
   1188 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
   1189   atomic_load_sub_64_gen, i64imm>;
   1190 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
   1191   ".add", atomic_load_sub_64_gen, i64imm>;
   1192 
   1193 // atom_swap
   1194 
   1195 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1196   (atomic_swap_32 node:$a, node:$b)>;
   1197 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1198   (atomic_swap_32 node:$a, node:$b)>;
   1199 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1200   (atomic_swap_32 node:$a, node:$b)>;
   1201 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1202   (atomic_swap_64 node:$a, node:$b)>;
   1203 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1204   (atomic_swap_64 node:$a, node:$b)>;
   1205 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1206   (atomic_swap_64 node:$a, node:$b)>;
   1207 
   1208 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
   1209   atomic_swap_32_g, i32imm, imm>;
   1210 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
   1211   atomic_swap_32_s, i32imm, imm>;
   1212 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
   1213   atomic_swap_32_gen, i32imm, imm>;
   1214 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
   1215   ".exch", atomic_swap_32_gen, i32imm, imm>;
   1216 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
   1217   atomic_swap_64_g, i64imm, imm>;
   1218 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
   1219   atomic_swap_64_s, i64imm, imm>;
   1220 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
   1221   atomic_swap_64_gen, i64imm, imm>;
   1222 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
   1223   ".exch", atomic_swap_64_gen, i64imm, imm>;
   1224 
   1225 // atom_max
   1226 
   1227 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
   1228   , (atomic_load_max_32 node:$a, node:$b)>;
   1229 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1230   (atomic_load_max_32 node:$a, node:$b)>;
   1231 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1232   (atomic_load_max_32 node:$a, node:$b)>;
   1233 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
   1234   , (atomic_load_max_64 node:$a, node:$b)>;
   1235 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1236   (atomic_load_max_64 node:$a, node:$b)>;
   1237 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1238   (atomic_load_max_64 node:$a, node:$b)>;
   1239 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1240   (atomic_load_umax_32 node:$a, node:$b)>;
   1241 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1242   (atomic_load_umax_32 node:$a, node:$b)>;
   1243 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1244   (atomic_load_umax_32 node:$a, node:$b)>;
   1245 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1246   (atomic_load_umax_64 node:$a, node:$b)>;
   1247 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1248   (atomic_load_umax_64 node:$a, node:$b)>;
   1249 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1250   (atomic_load_umax_64 node:$a, node:$b)>;
   1251 
   1252 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
   1253   ".max", atomic_load_max_32_g, i32imm, imm>;
   1254 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
   1255   ".max", atomic_load_max_32_s, i32imm, imm>;
   1256 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
   1257   atomic_load_max_32_gen, i32imm, imm>;
   1258 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
   1259   ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
   1260 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
   1261   ".max", atomic_load_max_64_g, i64imm, imm>;
   1262 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
   1263   ".max", atomic_load_max_64_s, i64imm, imm>;
   1264 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
   1265   atomic_load_max_64_gen, i64imm, imm>;
   1266 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
   1267   ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
   1268 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
   1269   ".max", atomic_load_umax_32_g, i32imm, imm>;
   1270 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
   1271   ".max", atomic_load_umax_32_s, i32imm, imm>;
   1272 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
   1273   atomic_load_umax_32_gen, i32imm, imm>;
   1274 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
   1275   ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
   1276 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
   1277   ".max", atomic_load_umax_64_g, i64imm, imm>;
   1278 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
   1279   ".max", atomic_load_umax_64_s, i64imm, imm>;
   1280 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
   1281   atomic_load_umax_64_gen, i64imm, imm>;
   1282 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
   1283   ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
   1284 
   1285 // atom_min
   1286 
   1287 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1288   (atomic_load_min_32 node:$a, node:$b)>;
   1289 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1290   (atomic_load_min_32 node:$a, node:$b)>;
   1291 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1292   (atomic_load_min_32 node:$a, node:$b)>;
   1293 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1294   (atomic_load_min_64 node:$a, node:$b)>;
   1295 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1296   (atomic_load_min_64 node:$a, node:$b)>;
   1297 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1298   (atomic_load_min_64 node:$a, node:$b)>;
   1299 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1300   (atomic_load_umin_32 node:$a, node:$b)>;
   1301 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1302   (atomic_load_umin_32 node:$a, node:$b)>;
   1303 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1304   (atomic_load_umin_32 node:$a, node:$b)>;
   1305 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1306   (atomic_load_umin_64 node:$a, node:$b)>;
   1307 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1308   (atomic_load_umin_64 node:$a, node:$b)>;
   1309 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1310   (atomic_load_umin_64 node:$a, node:$b)>;
   1311 
   1312 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
   1313   ".min", atomic_load_min_32_g, i32imm, imm>;
   1314 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
   1315   ".min", atomic_load_min_32_s, i32imm, imm>;
   1316 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
   1317   atomic_load_min_32_gen, i32imm, imm>;
   1318 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
   1319   ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
   1320 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
   1321   ".min", atomic_load_min_64_g, i64imm, imm>;
   1322 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
   1323   ".min", atomic_load_min_64_s, i64imm, imm>;
   1324 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
   1325   atomic_load_min_64_gen, i64imm, imm>;
   1326 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
   1327   ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
   1328 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
   1329   ".min", atomic_load_umin_32_g, i32imm, imm>;
   1330 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
   1331   ".min", atomic_load_umin_32_s, i32imm, imm>;
   1332 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
   1333   atomic_load_umin_32_gen, i32imm, imm>;
   1334 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
   1335   ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
   1336 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
   1337   ".min", atomic_load_umin_64_g, i64imm, imm>;
   1338 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
   1339   ".min", atomic_load_umin_64_s, i64imm, imm>;
   1340 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
   1341   atomic_load_umin_64_gen, i64imm, imm>;
   1342 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
   1343   ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
   1344 
   1345 // atom_inc  atom_dec
   1346 
   1347 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1348   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
   1349 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1350   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
   1351 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1352   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
   1353 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1354   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
   1355 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1356   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
   1357 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1358   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
   1359 
   1360 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
   1361   atomic_load_inc_32_g, i32imm, imm>;
   1362 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
   1363   atomic_load_inc_32_s, i32imm, imm>;
   1364 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
   1365   atomic_load_inc_32_gen, i32imm, imm>;
   1366 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
   1367   ".inc", atomic_load_inc_32_gen, i32imm, imm>;
   1368 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
   1369   atomic_load_dec_32_g, i32imm, imm>;
   1370 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
   1371   atomic_load_dec_32_s, i32imm, imm>;
   1372 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
   1373   atomic_load_dec_32_gen, i32imm, imm>;
   1374 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
   1375   ".dec", atomic_load_dec_32_gen, i32imm, imm>;
   1376 
   1377 // atom_and
   1378 
   1379 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1380   (atomic_load_and_32 node:$a, node:$b)>;
   1381 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1382   (atomic_load_and_32 node:$a, node:$b)>;
   1383 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1384   (atomic_load_and_32 node:$a, node:$b)>;
   1385 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1386   (atomic_load_and_64 node:$a, node:$b)>;
   1387 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1388   (atomic_load_and_64 node:$a, node:$b)>;
   1389 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1390   (atomic_load_and_64 node:$a, node:$b)>;
   1391 
   1392 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
   1393   atomic_load_and_32_g, i32imm, imm>;
   1394 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
   1395   atomic_load_and_32_s, i32imm, imm>;
   1396 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
   1397   atomic_load_and_32_gen, i32imm, imm>;
   1398 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
   1399   ".and", atomic_load_and_32_gen, i32imm, imm>;
   1400 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
   1401   atomic_load_and_64_g, i64imm, imm>;
   1402 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
   1403   atomic_load_and_64_s, i64imm, imm>;
   1404 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
   1405   atomic_load_and_64_gen, i64imm, imm>;
   1406 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
   1407   ".and", atomic_load_and_64_gen, i64imm, imm>;
   1408 
   1409 // atom_or
   1410 
   1411 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1412   (atomic_load_or_32 node:$a, node:$b)>;
   1413 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1414   (atomic_load_or_32 node:$a, node:$b)>;
   1415 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1416   (atomic_load_or_32 node:$a, node:$b)>;
   1417 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1418   (atomic_load_or_64 node:$a, node:$b)>;
   1419 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1420   (atomic_load_or_64 node:$a, node:$b)>;
   1421 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1422   (atomic_load_or_64 node:$a, node:$b)>;
   1423 
   1424 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
   1425   atomic_load_or_32_g, i32imm, imm>;
   1426 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
   1427   atomic_load_or_32_gen, i32imm, imm>;
   1428 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
   1429   ".or", atomic_load_or_32_gen, i32imm, imm>;
   1430 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
   1431   atomic_load_or_32_s, i32imm, imm>;
   1432 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
   1433   atomic_load_or_64_g, i64imm, imm>;
   1434 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
   1435   atomic_load_or_64_gen, i64imm, imm>;
   1436 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
   1437   ".or", atomic_load_or_64_gen, i64imm, imm>;
   1438 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
   1439   atomic_load_or_64_s, i64imm, imm>;
   1440 
   1441 // atom_xor
   1442 
   1443 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1444   (atomic_load_xor_32 node:$a, node:$b)>;
   1445 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1446   (atomic_load_xor_32 node:$a, node:$b)>;
   1447 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1448   (atomic_load_xor_32 node:$a, node:$b)>;
   1449 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1450   (atomic_load_xor_64 node:$a, node:$b)>;
   1451 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1452   (atomic_load_xor_64 node:$a, node:$b)>;
   1453 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1454   (atomic_load_xor_64 node:$a, node:$b)>;
   1455 
   1456 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
   1457   atomic_load_xor_32_g, i32imm, imm>;
   1458 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
   1459   atomic_load_xor_32_s, i32imm, imm>;
   1460 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
   1461   atomic_load_xor_32_gen, i32imm, imm>;
   1462 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
   1463   ".xor", atomic_load_xor_32_gen, i32imm, imm>;
   1464 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
   1465   atomic_load_xor_64_g, i64imm, imm>;
   1466 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
   1467   atomic_load_xor_64_s, i64imm, imm>;
   1468 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
   1469   atomic_load_xor_64_gen, i64imm, imm>;
   1470 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
   1471   ".xor", atomic_load_xor_64_gen, i64imm, imm>;
   1472 
   1473 // atom_cas
   1474 
   1475 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
   1476   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
   1477 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
   1478   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
   1479 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
   1480   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
   1481 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
   1482   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
   1483 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
   1484   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
   1485 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
   1486   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
   1487 
   1488 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
   1489   atomic_cmp_swap_32_g, i32imm>;
   1490 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
   1491   atomic_cmp_swap_32_s, i32imm>;
   1492 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
   1493   atomic_cmp_swap_32_gen, i32imm>;
   1494 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
   1495   ".cas", atomic_cmp_swap_32_gen, i32imm>;
   1496 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
   1497   atomic_cmp_swap_64_g, i64imm>;
   1498 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
   1499   atomic_cmp_swap_64_s, i64imm>;
   1500 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
   1501   atomic_cmp_swap_64_gen, i64imm>;
   1502 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
   1503   ".cas", atomic_cmp_swap_64_gen, i64imm>;
   1504 
   1505 // Support for scoped atomic operations.  Matches
   1506 // int_nvvm_atomic_{op}_{space}_{type}_{scope}
   1507 // and converts it into the appropriate instruction.
   1508 // NOTE: not all possible combinations are implemented
   1509 //  'space' is limited to generic as it's the only one needed to support CUDA.
   1510 //  'scope' = 'gpu' is default and is handled by regular atomic instructions.
   1511 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
   1512                   dag ins, dag Operands>
   1513       : NVPTXInst<(outs regclass:$result), ins,
   1514                   AsmStr,
   1515                   [(set regclass:$result, Operands)]>,
   1516         Requires<Preds>;
   1517 
   1518 // Define instruction variants for all addressing modes.
   1519 multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
   1520                        NVPTXRegClass regclass, Operand ImmType,
   1521                        SDNode Imm, ValueType ImmTy,
   1522                        list<Predicate> Preds> {
   1523   let AddedComplexity = 1 in {
   1524     def : ATOM23_impl<AsmStr, regclass, Preds,
   1525                       (ins Int32Regs:$src, regclass:$b),
   1526                       (Intr Int32Regs:$src, regclass:$b)>;
   1527     def : ATOM23_impl<AsmStr, regclass, Preds,
   1528                       (ins Int64Regs:$src, regclass:$b),
   1529                       (Intr Int64Regs:$src, regclass:$b)>;
   1530   }
   1531   // tablegen can't infer argument types from Intrinsic (though it can
   1532   // from Instruction) so we have to enforce specific type on
   1533   // immediates via explicit cast to ImmTy.
   1534   def : ATOM23_impl<AsmStr, regclass, Preds,
   1535                     (ins Int32Regs:$src, ImmType:$b),
   1536                     (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
   1537   def : ATOM23_impl<AsmStr, regclass, Preds,
   1538                     (ins Int64Regs:$src, ImmType:$b),
   1539                     (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
   1540 }
   1541 
   1542 multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
   1543                        NVPTXRegClass regclass, Operand ImmType,
   1544                        SDNode Imm, ValueType ImmTy,
   1545                        list<Predicate> Preds> {
   1546   // Variants for register/immediate permutations of $b and $c
   1547   let AddedComplexity = 2 in {
   1548     def : ATOM23_impl<AsmStr, regclass, Preds,
   1549                       (ins Int32Regs:$src, regclass:$b, regclass:$c),
   1550                       (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
   1551     def : ATOM23_impl<AsmStr, regclass, Preds,
   1552                       (ins Int64Regs:$src, regclass:$b, regclass:$c),
   1553                       (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
   1554   }
   1555   let AddedComplexity = 1 in {
   1556     def : ATOM23_impl<AsmStr, regclass, Preds,
   1557                       (ins Int32Regs:$src, ImmType:$b, regclass:$c),
   1558                       (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
   1559     def : ATOM23_impl<AsmStr, regclass, Preds,
   1560                       (ins Int64Regs:$src, ImmType:$b, regclass:$c),
   1561                       (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
   1562     def : ATOM23_impl<AsmStr, regclass, Preds,
   1563                       (ins Int32Regs:$src, regclass:$b, ImmType:$c),
   1564                       (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
   1565     def : ATOM23_impl<AsmStr, regclass, Preds,
   1566                       (ins Int64Regs:$src, regclass:$b, ImmType:$c),
   1567                       (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
   1568   }
   1569   def : ATOM23_impl<AsmStr, regclass, Preds,
   1570                     (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
   1571                     (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
   1572   def : ATOM23_impl<AsmStr, regclass, Preds,
   1573                     (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
   1574                     (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
   1575 }
   1576 
   1577 // Constructs instrinsic name and instruction asm strings.
   1578 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
   1579                        string ScopeStr, string SpaceStr,
   1580                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
   1581                        ValueType ImmTy, list<Predicate> Preds> {
   1582   defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
   1583                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
   1584                             # "." # OpStr # "." # TypeStr
   1585                             # " \t$result, [$src], $b;",
   1586                      !cast<Intrinsic>(
   1587                             "int_nvvm_atomic_" # OpStr
   1588                             # "_" # SpaceStr # "_" # IntTypeStr
   1589                             # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
   1590                      regclass, ImmType, Imm, ImmTy, Preds>;
   1591 }
   1592 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
   1593                        string ScopeStr, string SpaceStr,
   1594                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
   1595                        ValueType ImmTy, list<Predicate> Preds> {
   1596   defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
   1597                             # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
   1598                             # "." # OpStr # "." # TypeStr
   1599                             # " \t$result, [$src], $b, $c;",
   1600                      !cast<Intrinsic>(
   1601                             "int_nvvm_atomic_" # OpStr
   1602                             # "_" # SpaceStr # "_" # IntTypeStr
   1603                             # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
   1604                      regclass, ImmType, Imm, ImmTy, Preds>;
   1605 }
   1606 
   1607 // Constructs variants for different address spaces.
   1608 // For now we only need variants for generic space pointers.
   1609 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
   1610                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
   1611                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
   1612    defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
   1613                             regclass, ImmType, Imm, ImmTy, Preds>;
   1614 }
   1615 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
   1616                        string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
   1617                        SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
   1618    defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
   1619                             regclass, ImmType, Imm, ImmTy, Preds>;
   1620 }
   1621 
   1622 // Constructs variants for different scopes of atomic op.
   1623 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
   1624                        NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
   1625                        ValueType ImmTy, list<Predicate> Preds> {
   1626    // .gpu scope is default and is currently covered by existing
   1627    // atomics w/o explicitly specified scope.
   1628    defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
   1629                            regclass, ImmType, Imm, ImmTy,
   1630                            !listconcat(Preds,[hasAtomScope])>;
   1631    defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
   1632                            regclass, ImmType, Imm, ImmTy,
   1633                            !listconcat(Preds,[hasAtomScope])>;
   1634 }
   1635 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
   1636            NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
   1637            list<Predicate> Preds> {
   1638    // No need to define ".gpu"-scoped atomics.  They do the same thing
   1639    // as the regular, non-scoped atomics defined elsewhere.
   1640    defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
   1641                            regclass, ImmType, Imm, ImmTy,
   1642                            !listconcat(Preds,[hasAtomScope])>;
   1643    defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
   1644                            regclass, ImmType, Imm, ImmTy,
   1645                            !listconcat(Preds,[hasAtomScope])>;
   1646 }
   1647 
   1648 // atom.add
   1649 multiclass ATOM2_add_impl<string OpStr> {
   1650    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
   1651    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
   1652    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
   1653    defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
   1654                             []>;
   1655    defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
   1656                             [hasAtomAddF64]>;
   1657 }
   1658 
   1659 // atom.{and,or,xor}
   1660 multiclass ATOM2_bitwise_impl<string OpStr> {
   1661    defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
   1662    defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
   1663                             [hasAtomBitwise64]>;
   1664 }
   1665 
   1666 // atom.exch
   1667 multiclass ATOM2_exch_impl<string OpStr> {
   1668    defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
   1669    defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
   1670 }
   1671 
   1672 // atom.{min,max}
   1673 multiclass ATOM2_minmax_impl<string OpStr> {
   1674    defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
   1675    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
   1676    defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
   1677                             [hasAtomMinMax64]>;
   1678    defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
   1679                             [hasAtomMinMax64]>;
   1680 }
   1681 
   1682 // atom.{inc,dec}
   1683 multiclass ATOM2_incdec_impl<string OpStr> {
   1684    defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
   1685 }
   1686 
   1687 // atom.cas
   1688 multiclass ATOM3_cas_impl<string OpStr> {
   1689    defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
   1690    defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
   1691 }
   1692 
   1693 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
   1694 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
   1695 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
   1696 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
   1697 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
   1698 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
   1699 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
   1700 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
   1701 defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
   1702 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
   1703 
   1704 //-----------------------------------
   1705 // Support for ldu on sm_20 or later
   1706 //-----------------------------------
   1707 
   1708 // Don't annotate ldu instructions as mayLoad, as they load from memory that is
   1709 // read-only in a kernel.
   1710 
   1711 // Scalar
   1712 
   1713 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
   1714   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
   1715                !strconcat("ldu.global.", TyStr),
   1716                       []>, Requires<[hasLDU]>;
   1717   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
   1718                !strconcat("ldu.global.", TyStr),
   1719                         []>, Requires<[hasLDU]>;
   1720  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
   1721                !strconcat("ldu.global.", TyStr),
   1722                       []>, Requires<[hasLDU]>;
   1723  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
   1724                !strconcat("ldu.global.", TyStr),
   1725                       []>, Requires<[hasLDU]>;
   1726  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
   1727                !strconcat("ldu.global.", TyStr),
   1728                         []>, Requires<[hasLDU]>;
   1729 }
   1730 
   1731 defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
   1732 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
   1733 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
   1734 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
   1735 defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
   1736 defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
   1737 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
   1738 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
   1739 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
   1740 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
   1741 
   1742 // vector
   1743 
   1744 // Elementized vector ldu
   1745 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
   1746  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1747                      (ins Int32Regs:$src),
   1748                      !strconcat("ldu.global.", TyStr), []>;
   1749  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1750                      (ins Int64Regs:$src),
   1751                      !strconcat("ldu.global.", TyStr), []>;
   1752  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1753                      (ins MEMri:$src),
   1754                      !strconcat("ldu.global.", TyStr), []>;
   1755  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1756                      (ins MEMri64:$src),
   1757                      !strconcat("ldu.global.", TyStr), []>;
   1758  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1759                      (ins imemAny:$src),
   1760                      !strconcat("ldu.global.", TyStr), []>;
   1761 }
   1762 
   1763 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
   1764  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1765                             regclass:$dst4), (ins Int32Regs:$src), 
   1766                !strconcat("ldu.global.", TyStr), []>;
   1767  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1768                             regclass:$dst4), (ins Int64Regs:$src), 
   1769                !strconcat("ldu.global.", TyStr), []>;
   1770  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1771                             regclass:$dst4), (ins MEMri:$src), 
   1772                !strconcat("ldu.global.", TyStr), []>;
   1773  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1774                             regclass:$dst4), (ins MEMri64:$src), 
   1775                !strconcat("ldu.global.", TyStr), []>;
   1776  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1777                             regclass:$dst4), (ins imemAny:$src), 
   1778                !strconcat("ldu.global.", TyStr), []>;
   1779 }
   1780 
   1781 defm INT_PTX_LDU_G_v2i8_ELE
   1782   : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
   1783 defm INT_PTX_LDU_G_v2i16_ELE
   1784   : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
   1785 defm INT_PTX_LDU_G_v2i32_ELE
   1786   : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
   1787 defm INT_PTX_LDU_G_v2f16_ELE
   1788   : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
   1789 defm INT_PTX_LDU_G_v2f16x2_ELE
   1790   : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
   1791 defm INT_PTX_LDU_G_v2f32_ELE
   1792   : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
   1793 defm INT_PTX_LDU_G_v2i64_ELE
   1794   : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
   1795 defm INT_PTX_LDU_G_v2f64_ELE
   1796   : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
   1797 defm INT_PTX_LDU_G_v4i8_ELE
   1798   : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
   1799 defm INT_PTX_LDU_G_v4i16_ELE
   1800   : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
   1801     Int16Regs>;
   1802 defm INT_PTX_LDU_G_v4i32_ELE
   1803   : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
   1804     Int32Regs>;
   1805 defm INT_PTX_LDU_G_v4f16_ELE
   1806   : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
   1807     Float16Regs>;
   1808 defm INT_PTX_LDU_G_v4f16x2_ELE
   1809   : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
   1810     Float16x2Regs>;
   1811 defm INT_PTX_LDU_G_v4f32_ELE
   1812   : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
   1813     Float32Regs>;
   1814 
   1815 
   1816 //-----------------------------------
   1817 // Support for ldg on sm_35 or later 
   1818 //-----------------------------------
   1819 
   1820 // Don't annotate ld.global.nc as mayLoad, because these loads go through the
   1821 // non-coherent texture cache, and therefore the values read must be read-only
   1822 // during the lifetime of the kernel.
   1823 
   1824 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
   1825   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
   1826                !strconcat("ld.global.nc.", TyStr),
   1827                       []>, Requires<[hasLDG]>;
   1828   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
   1829                !strconcat("ld.global.nc.", TyStr),
   1830                         []>, Requires<[hasLDG]>;
   1831  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
   1832                !strconcat("ld.global.nc.", TyStr),
   1833                       []>, Requires<[hasLDG]>;
   1834  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
   1835                !strconcat("ld.global.nc.", TyStr),
   1836                       []>, Requires<[hasLDG]>;
   1837  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
   1838                !strconcat("ld.global.nc.", TyStr),
   1839                         []>, Requires<[hasLDG]>;
   1840 }
   1841 
   1842 defm INT_PTX_LDG_GLOBAL_i8
   1843   : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
   1844 defm INT_PTX_LDG_GLOBAL_i16
   1845   : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
   1846 defm INT_PTX_LDG_GLOBAL_i32
   1847   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
   1848 defm INT_PTX_LDG_GLOBAL_i64
   1849   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
   1850 defm INT_PTX_LDG_GLOBAL_f16
   1851   : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
   1852 defm INT_PTX_LDG_GLOBAL_f16x2
   1853   : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
   1854 defm INT_PTX_LDG_GLOBAL_f32
   1855   : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
   1856 defm INT_PTX_LDG_GLOBAL_f64
   1857   : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
   1858 defm INT_PTX_LDG_GLOBAL_p32
   1859   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
   1860 defm INT_PTX_LDG_GLOBAL_p64
   1861   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
   1862 
   1863 // vector
   1864 
   1865 // Elementized vector ldg 
   1866 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
   1867  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1868                      (ins Int32Regs:$src),
   1869                      !strconcat("ld.global.nc.", TyStr), []>;
   1870  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1871                      (ins Int64Regs:$src),
   1872                      !strconcat("ld.global.nc.", TyStr), []>;
   1873  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1874                      (ins MEMri:$src),
   1875                      !strconcat("ld.global.nc.", TyStr), []>;
   1876  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1877                      (ins MEMri64:$src),
   1878                      !strconcat("ld.global.nc.", TyStr), []>;
   1879  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1880                      (ins imemAny:$src),
   1881                      !strconcat("ld.global.nc.", TyStr), []>;
   1882 }
   1883 
   1884 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
   1885   def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1886                               regclass:$dst4), (ins Int32Regs:$src), 
   1887                !strconcat("ld.global.nc.", TyStr), []>;
   1888   def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1889                                regclass:$dst4), (ins Int64Regs:$src), 
   1890                !strconcat("ld.global.nc.", TyStr), []>;
   1891   def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1892                               regclass:$dst4), (ins MEMri:$src), 
   1893                !strconcat("ld.global.nc.", TyStr), []>;
   1894   def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1895                               regclass:$dst4), (ins MEMri64:$src), 
   1896                !strconcat("ld.global.nc.", TyStr), []>;
   1897   def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1898                              regclass:$dst4), (ins imemAny:$src), 
   1899                !strconcat("ld.global.nc.", TyStr), []>;
   1900 }
   1901 
   1902 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
   1903 defm INT_PTX_LDG_G_v2i8_ELE
   1904   : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
   1905 defm INT_PTX_LDG_G_v2i16_ELE
   1906   : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
   1907 defm INT_PTX_LDG_G_v2i32_ELE
   1908   : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
   1909 defm INT_PTX_LDG_G_v2f16_ELE
   1910   : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
   1911 defm INT_PTX_LDG_G_v2f16x2_ELE
   1912   : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
   1913 defm INT_PTX_LDG_G_v2f32_ELE
   1914   : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
   1915 defm INT_PTX_LDG_G_v2i64_ELE
   1916   : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
   1917 defm INT_PTX_LDG_G_v2f64_ELE
   1918   : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
   1919 defm INT_PTX_LDG_G_v4i8_ELE
   1920   : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
   1921 defm INT_PTX_LDG_G_v4i16_ELE
   1922   : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
   1923 defm INT_PTX_LDG_G_v4i32_ELE
   1924   : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
   1925 defm INT_PTX_LDG_G_v4f16_ELE
   1926   : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
   1927 defm INT_PTX_LDG_G_v4f16x2_ELE
   1928   : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
   1929 defm INT_PTX_LDG_G_v4f32_ELE
   1930   : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
   1931 
   1932 
   1933 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
   1934    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
   1935           !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
   1936       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
   1937    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
   1938           !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
   1939       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
   1940    def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
   1941           "{{ .reg .b64 %tmp;\n\t"
   1942           #"  cvt.u64.u32 \t%tmp, $src;\n\t"
   1943           #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
   1944       [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
   1945       Requires<[useShortPtr]>;
   1946 }
   1947 
   1948 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
   1949    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
   1950           !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
   1951       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
   1952    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
   1953           !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
   1954       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
   1955    def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
   1956           "{{ .reg .b64 %tmp;\n\t"
   1957           #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
   1958           #"  cvt.u32.u64 \t$result, %tmp; }}",
   1959       [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
   1960       Requires<[useShortPtr]>;
   1961 }
   1962 
   1963 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
   1964 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
   1965 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
   1966 defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
   1967 
   1968 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
   1969 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
   1970 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
   1971 defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
   1972 
   1973 
   1974 // nvvm.ptr.gen.to.param
   1975 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
   1976   (ins Int32Regs:$src),
   1977                         "mov.u32 \t$result, $src;",
   1978                               [(set Int32Regs:$result,
   1979                                 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
   1980 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
   1981   (ins Int64Regs:$src),
   1982                         "mov.u64 \t$result, $src;",
   1983                               [(set Int64Regs:$result,
   1984                                 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
   1985 
   1986 
   1987 // nvvm.move intrinsicc
   1988 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
   1989                              "mov.b16 \t$r, $s;",
   1990                              [(set Int16Regs:$r,
   1991                                (int_nvvm_move_i16 Int16Regs:$s))]>;
   1992 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
   1993                              "mov.b32 \t$r, $s;",
   1994                              [(set Int32Regs:$r,
   1995                                (int_nvvm_move_i32 Int32Regs:$s))]>;
   1996 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
   1997                              "mov.b64 \t$r, $s;",
   1998                              [(set Int64Regs:$r,
   1999                                (int_nvvm_move_i64 Int64Regs:$s))]>;
   2000 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
   2001                              "mov.f32 \t$r, $s;",
   2002                              [(set Float32Regs:$r,
   2003                                (int_nvvm_move_float Float32Regs:$s))]>;
   2004 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
   2005                              "mov.f64 \t$r, $s;",
   2006                              [(set Float64Regs:$r,
   2007                                (int_nvvm_move_double Float64Regs:$s))]>;
   2008 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
   2009                              "mov.u32 \t$r, $s;",
   2010                              [(set Int32Regs:$r,
   2011                                (int_nvvm_move_ptr Int32Regs:$s))]>;
   2012 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
   2013                              "mov.u64 \t$r, $s;",
   2014                              [(set Int64Regs:$r,
   2015                                (int_nvvm_move_ptr Int64Regs:$s))]>;
   2016 
   2017 // @TODO: Are these actually needed, or will we always just see symbols
   2018 // copied to registers first?
   2019 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
   2020                              "mov.u32 \t$r, $s;",
   2021                              [(set Int32Regs:$r,
   2022                              (int_nvvm_move_ptr texternalsym:$s))]>;
   2023 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
   2024                              "mov.u64 \t$r, $s;",
   2025                              [(set Int64Regs:$r,
   2026                              (int_nvvm_move_ptr texternalsym:$s))]>;*/
   2027 
   2028 
   2029 // MoveParam        %r1, param
   2030 // ptr_local_to_gen %r2, %r1
   2031 // ptr_gen_to_local %r3, %r2
   2032 // ->
   2033 // mov %r1, param
   2034 
   2035 // @TODO: Revisit this.  There is a type
   2036 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
   2037 // instructions are not currently defined. However, we can use the ptr
   2038 // variants and the asm printer will do the right thing.
   2039 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
   2040                 (MoveParam texternalsym:$src)))),
   2041                (nvvm_move_ptr64  texternalsym:$src)>;
   2042 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
   2043                 (MoveParam texternalsym:$src)))),
   2044                (nvvm_move_ptr32  texternalsym:$src)>;
   2045 
   2046 def texsurf_handles
   2047   : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
   2048               "mov.u64 \t$result, $src;", []>;
   2049 
   2050 //-----------------------------------
   2051 // Compiler Error Warn
   2052 // - Just ignore them in codegen
   2053 //-----------------------------------
   2054 
   2055 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
   2056                 "// llvm.nvvm.compiler.warn()",
   2057                 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
   2058 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
   2059                 "// llvm.nvvm.compiler.warn()",
   2060                 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
   2061 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
   2062                 "// llvm.nvvm.compiler.error()",
   2063                 [(int_nvvm_compiler_error Int32Regs:$a)]>;
   2064 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
   2065                 "// llvm.nvvm.compiler.error()",
   2066                 [(int_nvvm_compiler_error Int64Regs:$a)]>;
   2067 
   2068 
   2069 // isspacep
   2070 
   2071 def ISSPACEP_CONST_32
   2072   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
   2073               "isspacep.const \t$d, $a;",
   2074               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
   2075     Requires<[hasPTX31]>;
   2076 def ISSPACEP_CONST_64
   2077   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   2078               "isspacep.const \t$d, $a;",
   2079               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
   2080     Requires<[hasPTX31]>;
   2081 def ISSPACEP_GLOBAL_32
   2082   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
   2083               "isspacep.global \t$d, $a;",
   2084               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
   2085 def ISSPACEP_GLOBAL_64
   2086   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   2087               "isspacep.global \t$d, $a;",
   2088               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
   2089 def ISSPACEP_LOCAL_32
   2090   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
   2091               "isspacep.local \t$d, $a;",
   2092               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
   2093 def ISSPACEP_LOCAL_64
   2094   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   2095               "isspacep.local \t$d, $a;",
   2096               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
   2097 def ISSPACEP_SHARED_32
   2098   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
   2099               "isspacep.shared \t$d, $a;",
   2100               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
   2101 def ISSPACEP_SHARED_64
   2102   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   2103               "isspacep.shared \t$d, $a;",
   2104               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
   2105 
   2106 
   2107 // Special register reads
   2108 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
   2109                             (ins SpecialRegs:$r),
   2110                             "mov.b32 \t$d, $r;", []>;
   2111 
   2112 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
   2113 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
   2114 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
   2115 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
   2116 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
   2117 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
   2118 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
   2119 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
   2120 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
   2121 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
   2122 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
   2123 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
   2124 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
   2125 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
   2126 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
   2127 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
   2128 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
   2129 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
   2130 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
   2131 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
   2132 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
   2133 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
   2134 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
   2135 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
   2136 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
   2137 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
   2138 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
   2139 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
   2140 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
   2141 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
   2142 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
   2143 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
   2144 
   2145 
   2146 // rotate builtin support
   2147 
   2148 def ROTATE_B32_HW_IMM
   2149   : NVPTXInst<(outs Int32Regs:$dst),
   2150               (ins  Int32Regs:$src, i32imm:$amt),
   2151               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
   2152               [(set Int32Regs:$dst,
   2153                  (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
   2154               Requires<[hasHWROT32]> ;
   2155 
   2156 def ROTATE_B32_HW_REG
   2157   : NVPTXInst<(outs Int32Regs:$dst),
   2158               (ins  Int32Regs:$src, Int32Regs:$amt),
   2159               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
   2160               [(set Int32Regs:$dst,
   2161                  (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
   2162               Requires<[hasHWROT32]> ;
   2163 
   2164 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
   2165           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
   2166       Requires<[noHWROT32]> ;
   2167 
   2168 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
   2169           (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
   2170       Requires<[noHWROT32]> ;
   2171 
   2172 let hasSideEffects = 0 in {
   2173   def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
   2174     !strconcat("{{\n\t",
   2175                ".reg .b32 %dummy;\n\t",
   2176                "mov.b64 \t{$dst,%dummy}, $src;\n\t",
   2177                "}}"),
   2178           []> ;
   2179 
   2180   def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
   2181     !strconcat("{{\n\t",
   2182                ".reg .b32 %dummy;\n\t",
   2183                "mov.b64 \t{%dummy,$dst}, $src;\n\t",
   2184                "}}"),
   2185           []> ;
   2186 }
   2187 
   2188 let hasSideEffects = 0 in {
   2189   def PACK_TWO_INT32
   2190     : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
   2191                 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
   2192 }
   2193 
   2194 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
   2195           (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
   2196                           (GET_LO_INT64 Int64Regs:$src))> ;
   2197 
   2198 // Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
   2199 // no side effects.
   2200 let hasSideEffects = 0 in {
   2201   def SHF_L_WRAP_B32_IMM
   2202     : NVPTXInst<(outs Int32Regs:$dst),
   2203                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
   2204                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
   2205       Requires<[hasHWROT32]>;
   2206 
   2207   def SHF_L_WRAP_B32_REG
   2208     : NVPTXInst<(outs Int32Regs:$dst),
   2209                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
   2210                 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
   2211       Requires<[hasHWROT32]>;
   2212 
   2213   def SHF_R_WRAP_B32_IMM
   2214     : NVPTXInst<(outs Int32Regs:$dst),
   2215                 (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
   2216                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
   2217       Requires<[hasHWROT32]>;
   2218 
   2219   def SHF_R_WRAP_B32_REG
   2220     : NVPTXInst<(outs Int32Regs:$dst),
   2221                 (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
   2222                 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
   2223       Requires<[hasHWROT32]>;
   2224 }
   2225 
   2226 // HW version of rotate 64
   2227 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
   2228           (PACK_TWO_INT32
   2229             (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
   2230                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
   2231             (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
   2232                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
   2233       Requires<[hasHWROT32]>;
   2234 
   2235 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
   2236           (PACK_TWO_INT32
   2237             (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
   2238                                 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
   2239             (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
   2240                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
   2241       Requires<[hasHWROT32]>;
   2242 
   2243 
   2244 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
   2245           (PACK_TWO_INT32
   2246             (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
   2247                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
   2248             (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
   2249                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
   2250       Requires<[hasHWROT32]>;
   2251 
   2252 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
   2253           (PACK_TWO_INT32
   2254             (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
   2255                                 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
   2256             (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
   2257                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
   2258       Requires<[hasHWROT32]>;
   2259 
   2260 // SW version of rotate 64
   2261 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
   2262           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
   2263       Requires<[noHWROT32]>;
   2264 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
   2265           (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
   2266       Requires<[noHWROT32]>;
   2267 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
   2268           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
   2269       Requires<[noHWROT32]>;
   2270 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
   2271           (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
   2272       Requires<[noHWROT32]>;
   2273 
   2274 
   2275 //-----------------------------------
   2276 // Texture Intrinsics
   2277 //-----------------------------------
   2278 
   2279 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
   2280 // also defined in NVPTXReplaceImageHandles.cpp
   2281 
   2282 // texmode_independent
   2283 let IsTex = 1, IsTexModeUnified = 0 in {
   2284 // Texture fetch instructions using handles
   2285 def TEX_1D_F32_S32
   2286   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2287                     Float32Regs:$b, Float32Regs:$a),
   2288               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
   2289               "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
   2290               []>;
   2291 def TEX_1D_F32_F32
   2292   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2293                     Float32Regs:$b, Float32Regs:$a),
   2294               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
   2295               "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
   2296               []>;
   2297 def TEX_1D_F32_F32_LEVEL
   2298   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2299                     Float32Regs:$b, Float32Regs:$a),
   2300               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
   2301               "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2302               "[$t, $s, \\{$x\\}], $lod;",
   2303               []>;
   2304 def TEX_1D_F32_F32_GRAD
   2305   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2306                     Float32Regs:$b, Float32Regs:$a),
   2307               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
   2308                    Float32Regs:$gradx, Float32Regs:$grady),
   2309               "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2310               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
   2311               []>;
   2312 def TEX_1D_S32_S32
   2313   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2314                     Int32Regs:$b, Int32Regs:$a),
   2315               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
   2316               "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
   2317               []>;
   2318 def TEX_1D_S32_F32
   2319   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2320                     Int32Regs:$b, Int32Regs:$a),
   2321               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
   2322               "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
   2323               []>;
   2324 def TEX_1D_S32_F32_LEVEL
   2325   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2326                     Int32Regs:$b, Int32Regs:$a),
   2327               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
   2328                    Float32Regs:$lod),
   2329               "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2330               "[$t, $s, \\{$x\\}], $lod;",
   2331               []>;
   2332 def TEX_1D_S32_F32_GRAD
   2333   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2334                     Int32Regs:$b, Int32Regs:$a),
   2335               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
   2336                    Float32Regs:$gradx, Float32Regs:$grady),
   2337               "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2338               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
   2339               []>;
   2340 def TEX_1D_U32_S32
   2341   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2342                     Int32Regs:$b, Int32Regs:$a),
   2343               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
   2344               "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
   2345               []>;
   2346 def TEX_1D_U32_F32
   2347   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2348                     Int32Regs:$b, Int32Regs:$a),
   2349               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
   2350               "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
   2351               []>;
   2352 def TEX_1D_U32_F32_LEVEL
   2353   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2354                     Int32Regs:$b, Int32Regs:$a),
   2355               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
   2356                    Float32Regs:$lod),
   2357               "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2358               "[$t, $s, \\{$x\\}], $lod;",
   2359               []>;
   2360 def TEX_1D_U32_F32_GRAD
   2361   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2362                     Int32Regs:$b, Int32Regs:$a),
   2363               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
   2364                    Float32Regs:$gradx, Float32Regs:$grady),
   2365               "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2366               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
   2367               []>;
   2368 
   2369 def TEX_1D_ARRAY_F32_S32
   2370   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2371                     Float32Regs:$b, Float32Regs:$a),
   2372               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   2373               "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
   2374               "[$t, $s, \\{$l, $x\\}];",
   2375               []>;
   2376 def TEX_1D_ARRAY_F32_F32
   2377   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2378                     Float32Regs:$b, Float32Regs:$a),
   2379               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
   2380               "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2381               "[$t, $s, \\{$l, $x\\}];",
   2382               []>;
   2383 def TEX_1D_ARRAY_F32_F32_LEVEL
   2384   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2385                     Float32Regs:$b, Float32Regs:$a),
   2386               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2387                    Float32Regs:$lod),
   2388               "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2389               "[$t, $s, \\{$l, $x\\}], $lod;",
   2390               []>;
   2391 def TEX_1D_ARRAY_F32_F32_GRAD
   2392   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2393                     Float32Regs:$b, Float32Regs:$a),
   2394               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2395                    Float32Regs:$gradx, Float32Regs:$grady),
   2396               "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2397               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
   2398               []>;
   2399 def TEX_1D_ARRAY_S32_S32
   2400   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2401                     Int32Regs:$b, Int32Regs:$a),
   2402               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   2403               "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
   2404               "[$t, $s, \\{$l, $x\\}];",
   2405               []>;
   2406 def TEX_1D_ARRAY_S32_F32
   2407   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2408                     Int32Regs:$b, Int32Regs:$a),
   2409               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
   2410               "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2411               "[$t, $s, \\{$l, $x\\}];",
   2412               []>;
   2413 def TEX_1D_ARRAY_S32_F32_LEVEL
   2414   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2415                     Int32Regs:$b, Int32Regs:$a),
   2416               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2417                    Float32Regs:$lod),
   2418               "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2419               "[$t, $s, \\{$l, $x\\}], $lod;",
   2420               []>;
   2421 def TEX_1D_ARRAY_S32_F32_GRAD
   2422   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2423                     Int32Regs:$b, Int32Regs:$a),
   2424               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2425                    Float32Regs:$gradx, Float32Regs:$grady),
   2426               "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2427               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
   2428               []>;
   2429 def TEX_1D_ARRAY_U32_S32
   2430   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2431                     Int32Regs:$b, Int32Regs:$a),
   2432               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   2433               "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
   2434               "[$t, $s, \\{$l, $x\\}];",
   2435               []>;
   2436 def TEX_1D_ARRAY_U32_F32
   2437   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2438                     Int32Regs:$b, Int32Regs:$a),
   2439               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
   2440               "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2441               "[$t, $s, \\{$l, $x\\}];",
   2442               []>;
   2443 def TEX_1D_ARRAY_U32_F32_LEVEL
   2444   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2445                     Int32Regs:$b, Int32Regs:$a),
   2446               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2447                    Float32Regs:$lod),
   2448               "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2449               "[$t, $s, \\{$l, $x\\}], $lod;",
   2450               []>;
   2451 def TEX_1D_ARRAY_U32_F32_GRAD
   2452   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2453                     Int32Regs:$b, Int32Regs:$a),
   2454               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2455                    Float32Regs:$gradx, Float32Regs:$grady),
   2456               "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2457               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
   2458               []>;
   2459 
   2460 def TEX_2D_F32_S32
   2461   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2462                     Float32Regs:$b, Float32Regs:$a),
   2463               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   2464               "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
   2465               "[$t, $s, \\{$x, $y\\}];",
   2466               []>;
   2467 def TEX_2D_F32_F32
   2468   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2469                     Float32Regs:$b, Float32Regs:$a),
   2470               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2471               "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2472               "[$t, $s, \\{$x, $y\\}];",
   2473               []>;
   2474 def TEX_2D_F32_F32_LEVEL
   2475   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2476                     Float32Regs:$b, Float32Regs:$a),
   2477               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2478                    Float32Regs:$lod),
   2479               "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2480               "[$t, $s, \\{$x, $y\\}], $lod;",
   2481               []>;
   2482 def TEX_2D_F32_F32_GRAD
   2483   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2484                     Float32Regs:$b, Float32Regs:$a),
   2485               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2486                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2487                    Float32Regs:$grady0, Float32Regs:$grady1),
   2488               "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2489               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
   2490               "\\{$grady0, $grady1\\};",
   2491               []>;
   2492 def TEX_2D_S32_S32
   2493   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2494                     Int32Regs:$b, Int32Regs:$a),
   2495               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   2496               "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
   2497               "[$t, $s, \\{$x, $y\\}];",
   2498               []>;
   2499 def TEX_2D_S32_F32
   2500   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2501                     Int32Regs:$b, Int32Regs:$a),
   2502               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2503               "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2504               "[$t, $s, \\{$x, $y\\}];",
   2505               []>;
   2506 def TEX_2D_S32_F32_LEVEL
   2507   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2508                     Int32Regs:$b, Int32Regs:$a),
   2509               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2510                    Float32Regs:$lod),
   2511               "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2512               "[$t, $s, \\{$x, $y\\}], $lod;",
   2513               []>;
   2514 def TEX_2D_S32_F32_GRAD
   2515   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2516                     Int32Regs:$b, Int32Regs:$a),
   2517               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2518                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2519                    Float32Regs:$grady0, Float32Regs:$grady1),
   2520               "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2521               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
   2522               "\\{$grady0, $grady1\\};",
   2523               []>;
   2524 def TEX_2D_U32_S32
   2525   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2526                     Int32Regs:$b, Int32Regs:$a),
   2527               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   2528               "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
   2529               "[$t, $s, \\{$x, $y\\}];",
   2530               []>;
   2531 def TEX_2D_U32_F32
   2532   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2533                     Int32Regs:$b, Int32Regs:$a),
   2534               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2535               "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2536               "[$t, $s, \\{$x, $y\\}];",
   2537               []>;
   2538 def TEX_2D_U32_F32_LEVEL
   2539   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2540                     Int32Regs:$b, Int32Regs:$a),
   2541               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2542                    Float32Regs:$lod),
   2543               "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2544               "[$t, $s, \\{$x, $y\\}], $lod;",
   2545               []>;
   2546 def TEX_2D_U32_F32_GRAD
   2547   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2548                     Int32Regs:$b, Int32Regs:$a),
   2549               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2550                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2551                    Float32Regs:$grady0, Float32Regs:$grady1),
   2552               "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2553               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
   2554               "\\{$grady0, $grady1\\};",
   2555               []>;
   2556 
   2557 def TEX_2D_ARRAY_F32_S32
   2558   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2559                     Float32Regs:$b, Float32Regs:$a),
   2560               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   2561                    Int32Regs:$y),
   2562               "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
   2563               "[$t, $s, \\{$l, $x, $y, $y\\}];",
   2564               []>;
   2565 def TEX_2D_ARRAY_F32_F32
   2566   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2567                     Float32Regs:$b, Float32Regs:$a),
   2568               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2569                    Float32Regs:$y),
   2570               "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2571               "[$t, $s, \\{$l, $x, $y, $y\\}];",
   2572               []>;
   2573 def TEX_2D_ARRAY_F32_F32_LEVEL
   2574   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2575                     Float32Regs:$b, Float32Regs:$a),
   2576               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2577                    Float32Regs:$y, Float32Regs:$lod),
   2578               "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2579               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
   2580               []>;
   2581 def TEX_2D_ARRAY_F32_F32_GRAD
   2582   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2583                     Float32Regs:$b, Float32Regs:$a),
   2584               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2585                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
   2586                    Float32Regs:$grady0, Float32Regs:$grady1),
   2587               "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2588               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
   2589               "\\{$grady0, $grady1\\};",
   2590               []>;
   2591 def TEX_2D_ARRAY_S32_S32
   2592   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2593                     Int32Regs:$b, Int32Regs:$a),
   2594               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   2595                    Int32Regs:$y),
   2596               "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
   2597               "[$t, $s, \\{$l, $x, $y, $y\\}];",
   2598               []>;
   2599 def TEX_2D_ARRAY_S32_F32
   2600   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2601                     Int32Regs:$b, Int32Regs:$a),
   2602               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2603                    Float32Regs:$y),
   2604               "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2605               "[$t, $s, \\{$l, $x, $y, $y\\}];",
   2606               []>;
   2607 def TEX_2D_ARRAY_S32_F32_LEVEL
   2608   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2609                     Int32Regs:$b, Int32Regs:$a),
   2610               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2611                    Float32Regs:$y, Float32Regs:$lod),
   2612               "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2613               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
   2614               []>;
   2615 def TEX_2D_ARRAY_S32_F32_GRAD
   2616   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2617                     Int32Regs:$b, Int32Regs:$a),
   2618               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2619                    Float32Regs:$y,
   2620                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2621                    Float32Regs:$grady0, Float32Regs:$grady1),
   2622               "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2623               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
   2624               "\\{$grady0, $grady1\\};",
   2625               []>;
   2626 def TEX_2D_ARRAY_U32_S32
   2627   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2628                     Int32Regs:$b, Int32Regs:$a),
   2629               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   2630                    Int32Regs:$y),
   2631               "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
   2632               "[$t, $s, \\{$l, $x, $y, $y\\}];",
   2633               []>;
   2634 def TEX_2D_ARRAY_U32_F32
   2635   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2636                     Int32Regs:$b, Int32Regs:$a),
   2637               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2638                    Float32Regs:$y),
   2639               "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2640               "[$t, $s, \\{$l, $x, $y, $y\\}];",
   2641               []>;
   2642 def TEX_2D_ARRAY_U32_F32_LEVEL
   2643   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2644                     Int32Regs:$b, Int32Regs:$a),
   2645               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2646                    Float32Regs:$y, Float32Regs:$lod),
   2647               "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2648               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
   2649               []>;
   2650 def TEX_2D_ARRAY_U32_F32_GRAD
   2651   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2652                     Int32Regs:$b, Int32Regs:$a),
   2653               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2654                    Float32Regs:$y,
   2655                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2656                    Float32Regs:$grady0, Float32Regs:$grady1),
   2657               "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2658               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
   2659               "\\{$grady0, $grady1\\};",
   2660               []>;
   2661 
   2662 def TEX_3D_F32_S32
   2663   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2664                     Float32Regs:$b, Float32Regs:$a),
   2665               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   2666                    Int32Regs:$z),
   2667               "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
   2668               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2669               []>;
   2670 def TEX_3D_F32_F32
   2671   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2672                     Float32Regs:$b, Float32Regs:$a),
   2673               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2674                    Float32Regs:$z),
   2675               "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2676               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2677               []>;
   2678 def TEX_3D_F32_F32_LEVEL
   2679   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2680                     Float32Regs:$b, Float32Regs:$a),
   2681               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2682                    Float32Regs:$z, Float32Regs:$lod),
   2683               "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2684               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
   2685               []>;
   2686 def TEX_3D_F32_F32_GRAD
   2687   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2688                     Float32Regs:$b, Float32Regs:$a),
   2689               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2690                    Float32Regs:$z,
   2691                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2692                    Float32Regs:$gradx2, Float32Regs:$grady0,
   2693                    Float32Regs:$grady1, Float32Regs:$grady2),
   2694               "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2695               "[$t, $s, \\{$x, $y, $z, $z\\}], "
   2696               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
   2697               "\\{$grady0, $grady1, $grady2, $grady2\\};",
   2698               []>;
   2699 def TEX_3D_S32_S32
   2700   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2701                     Int32Regs:$b, Int32Regs:$a),
   2702               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   2703                    Int32Regs:$z),
   2704               "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
   2705               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2706               []>;
   2707 def TEX_3D_S32_F32
   2708   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2709                     Int32Regs:$b, Int32Regs:$a),
   2710               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2711                    Float32Regs:$z),
   2712               "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2713               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2714               []>;
   2715 def TEX_3D_S32_F32_LEVEL
   2716   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2717                     Int32Regs:$b, Int32Regs:$a),
   2718               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2719                    Float32Regs:$z, Float32Regs:$lod),
   2720               "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2721               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
   2722               []>;
   2723 def TEX_3D_S32_F32_GRAD
   2724   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2725                     Int32Regs:$b, Int32Regs:$a),
   2726               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2727                    Float32Regs:$z,
   2728                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2729                    Float32Regs:$gradx2, Float32Regs:$grady0,
   2730                    Float32Regs:$grady1, Float32Regs:$grady2),
   2731               "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2732               "[$t, $s, \\{$x, $y, $z, $z\\}], "
   2733               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
   2734               "\\{$grady0, $grady1, $grady2, $grady2\\};",
   2735               []>;
   2736 def TEX_3D_U32_S32
   2737   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2738                     Int32Regs:$b, Int32Regs:$a),
   2739               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   2740                    Int32Regs:$z),
   2741               "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
   2742               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2743               []>;
   2744 def TEX_3D_U32_F32
   2745   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2746                     Int32Regs:$b, Int32Regs:$a),
   2747               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2748                    Float32Regs:$z),
   2749               "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2750               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2751               []>;
   2752 def TEX_3D_U32_F32_LEVEL
   2753   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2754                     Int32Regs:$b, Int32Regs:$a),
   2755               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2756                    Float32Regs:$z, Float32Regs:$lod),
   2757               "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2758               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
   2759               []>;
   2760 def TEX_3D_U32_F32_GRAD
   2761   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2762                     Int32Regs:$b, Int32Regs:$a),
   2763               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2764                    Float32Regs:$z,
   2765                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2766                    Float32Regs:$gradx2, Float32Regs:$grady0,
   2767                    Float32Regs:$grady1, Float32Regs:$grady2),
   2768               "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2769               "[$t, $s, \\{$x, $y, $z, $z\\}], "
   2770               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
   2771               "\\{$grady0, $grady1, $grady2, $grady2\\};",
   2772               []>;
   2773 
   2774 def TEX_CUBE_F32_F32
   2775   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2776                     Float32Regs:$b, Float32Regs:$a),
   2777               (ins Int64Regs:$t, Int64Regs:$s,
   2778                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   2779               "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2780               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2781               []>;
   2782 def TEX_CUBE_F32_F32_LEVEL
   2783   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2784                     Float32Regs:$b, Float32Regs:$a),
   2785               (ins Int64Regs:$t, Int64Regs:$s,
   2786                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   2787                    Float32Regs:$lod),
   2788               "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2789               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
   2790               []>;
   2791 def TEX_CUBE_S32_F32
   2792   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2793                     Int32Regs:$b, Int32Regs:$a),
   2794               (ins Int64Regs:$t, Int64Regs:$s,
   2795                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   2796               "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2797               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2798               []>;
   2799 def TEX_CUBE_S32_F32_LEVEL
   2800   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2801                     Int32Regs:$b, Int32Regs:$a),
   2802               (ins Int64Regs:$t, Int64Regs:$s,
   2803                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   2804                    Float32Regs:$lod),
   2805               "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2806               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
   2807               []>;
   2808 def TEX_CUBE_U32_F32
   2809   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2810                     Int32Regs:$b, Int32Regs:$a),
   2811               (ins Int64Regs:$t, Int64Regs:$s,
   2812                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   2813               "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2814               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2815               []>;
   2816 def TEX_CUBE_U32_F32_LEVEL
   2817   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2818                     Int32Regs:$b, Int32Regs:$a),
   2819               (ins Int64Regs:$t, Int64Regs:$s,
   2820                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   2821                    Float32Regs:$lod),
   2822               "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2823               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
   2824               []>;
   2825 
   2826 def TEX_CUBE_ARRAY_F32_F32
   2827   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2828                     Float32Regs:$b, Float32Regs:$a),
   2829               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
   2830                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   2831               "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2832               "[$t, $s, \\{$l, $x, $y, $z\\}];",
   2833               []>;
   2834 def TEX_CUBE_ARRAY_F32_F32_LEVEL
   2835   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2836                     Float32Regs:$b, Float32Regs:$a),
   2837               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
   2838                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   2839                    Float32Regs:$lod),
   2840               "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2841               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
   2842               []>;
   2843 def TEX_CUBE_ARRAY_S32_F32
   2844   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2845                     Int32Regs:$b, Int32Regs:$a),
   2846               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
   2847                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   2848               "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2849               "[$t, $s, \\{$l, $x, $y, $z\\}];",
   2850               []>;
   2851 def TEX_CUBE_ARRAY_S32_F32_LEVEL
   2852   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2853                     Int32Regs:$b, Int32Regs:$a),
   2854               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
   2855                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   2856                    Float32Regs:$lod),
   2857               "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   2858               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
   2859               []>;
   2860 def TEX_CUBE_ARRAY_U32_F32
   2861   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2862                     Int32Regs:$b, Int32Regs:$a),
   2863               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
   2864                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   2865               "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2866               "[$t, $s, \\{$l, $x, $y, $z\\}];",
   2867               []>;
   2868 def TEX_CUBE_ARRAY_U32_F32_LEVEL
   2869   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2870                     Int32Regs:$b, Int32Regs:$a),
   2871               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
   2872                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   2873                    Float32Regs:$lod),
   2874               "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   2875               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
   2876               []>;
   2877 
   2878 def TLD4_R_2D_F32_F32
   2879   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   2880                     Float32Regs:$v2, Float32Regs:$v3),
   2881               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2882               "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   2883               "[$t, $s, \\{$x, $y\\}];",
   2884               []>;
   2885 def TLD4_G_2D_F32_F32
   2886   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   2887                     Float32Regs:$v2, Float32Regs:$v3),
   2888               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2889               "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   2890               "[$t, $s, \\{$x, $y\\}];",
   2891               []>;
   2892 def TLD4_B_2D_F32_F32
   2893   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   2894                     Float32Regs:$v2, Float32Regs:$v3),
   2895               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2896               "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   2897               "[$t, $s, \\{$x, $y\\}];",
   2898               []>;
   2899 def TLD4_A_2D_F32_F32
   2900   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   2901                     Float32Regs:$v2, Float32Regs:$v3),
   2902               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2903               "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   2904               "[$t, $s, \\{$x, $y\\}];",
   2905               []>;
   2906 def TLD4_R_2D_S32_F32
   2907   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2908                     Int32Regs:$v2, Int32Regs:$v3),
   2909               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2910               "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   2911               "[$t, $s, \\{$x, $y\\}];",
   2912               []>;
   2913 def TLD4_G_2D_S32_F32
   2914   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2915                     Int32Regs:$v2, Int32Regs:$v3),
   2916               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2917               "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   2918               "[$t, $s, \\{$x, $y\\}];",
   2919               []>;
   2920 def TLD4_B_2D_S32_F32
   2921   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2922                     Int32Regs:$v2, Int32Regs:$v3),
   2923               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2924               "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   2925               "[$t, $s, \\{$x, $y\\}];",
   2926               []>;
   2927 def TLD4_A_2D_S32_F32
   2928   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2929                     Int32Regs:$v2, Int32Regs:$v3),
   2930               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2931               "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   2932               "[$t, $s, \\{$x, $y\\}];",
   2933               []>;
   2934 def TLD4_R_2D_U32_F32
   2935   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2936                     Int32Regs:$v2, Int32Regs:$v3),
   2937               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2938               "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   2939               "[$t, $s, \\{$x, $y\\}];",
   2940               []>;
   2941 def TLD4_G_2D_U32_F32
   2942   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2943                     Int32Regs:$v2, Int32Regs:$v3),
   2944               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2945               "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   2946               "[$t, $s, \\{$x, $y\\}];",
   2947               []>;
   2948 def TLD4_B_2D_U32_F32
   2949   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2950                     Int32Regs:$v2, Int32Regs:$v3),
   2951               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2952               "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   2953               "[$t, $s, \\{$x, $y\\}];",
   2954               []>;
   2955 def TLD4_A_2D_U32_F32
   2956   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2957                     Int32Regs:$v2, Int32Regs:$v3),
   2958               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2959               "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   2960               "[$t, $s, \\{$x, $y\\}];",
   2961               []>;
   2962 }
   2963 
   2964 
   2965 // texmode_unified
   2966 let IsTex = 1, IsTexModeUnified = 1 in {
   2967 // Texture fetch instructions using handles
   2968 def TEX_UNIFIED_1D_F32_S32
   2969   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2970                     Float32Regs:$b, Float32Regs:$a),
   2971               (ins Int64Regs:$t, Int32Regs:$x),
   2972               "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
   2973               []>;
   2974 def TEX_UNIFIED_1D_F32_F32
   2975   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2976                     Float32Regs:$b, Float32Regs:$a),
   2977               (ins Int64Regs:$t, Float32Regs:$x),
   2978               "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
   2979               []>;
   2980 def TEX_UNIFIED_1D_F32_F32_LEVEL
   2981   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2982                     Float32Regs:$b, Float32Regs:$a),
   2983               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
   2984               "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2985               "[$t, \\{$x\\}], $lod;",
   2986               []>;
   2987 def TEX_UNIFIED_1D_F32_F32_GRAD
   2988   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2989                     Float32Regs:$b, Float32Regs:$a),
   2990               (ins Int64Regs:$t, Float32Regs:$x,
   2991                    Float32Regs:$gradx, Float32Regs:$grady),
   2992               "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   2993               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
   2994               []>;
   2995 def TEX_UNIFIED_1D_S32_S32
   2996   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2997                     Int32Regs:$b, Int32Regs:$a),
   2998               (ins Int64Regs:$t, Int32Regs:$x),
   2999               "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
   3000               []>;
   3001 def TEX_UNIFIED_1D_S32_F32
   3002   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3003                     Int32Regs:$b, Int32Regs:$a),
   3004               (ins Int64Regs:$t, Float32Regs:$x),
   3005               "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
   3006               []>;
   3007 def TEX_UNIFIED_1D_S32_F32_LEVEL
   3008   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3009                     Int32Regs:$b, Int32Regs:$a),
   3010               (ins Int64Regs:$t, Float32Regs:$x,
   3011                    Float32Regs:$lod),
   3012               "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3013               "[$t, \\{$x\\}], $lod;",
   3014               []>;
   3015 def TEX_UNIFIED_1D_S32_F32_GRAD
   3016   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3017                     Int32Regs:$b, Int32Regs:$a),
   3018               (ins Int64Regs:$t, Float32Regs:$x,
   3019                    Float32Regs:$gradx, Float32Regs:$grady),
   3020               "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3021               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
   3022               []>;
   3023 def TEX_UNIFIED_1D_U32_S32
   3024   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3025                     Int32Regs:$b, Int32Regs:$a),
   3026               (ins Int64Regs:$t, Int32Regs:$x),
   3027               "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
   3028               []>;
   3029 def TEX_UNIFIED_1D_U32_F32
   3030   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3031                     Int32Regs:$b, Int32Regs:$a),
   3032               (ins Int64Regs:$t, Float32Regs:$x),
   3033               "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
   3034               []>;
   3035 def TEX_UNIFIED_1D_U32_F32_LEVEL
   3036   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3037                     Int32Regs:$b, Int32Regs:$a),
   3038               (ins Int64Regs:$t, Float32Regs:$x,
   3039                    Float32Regs:$lod),
   3040               "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3041               "[$t, \\{$x\\}], $lod;",
   3042               []>;
   3043 def TEX_UNIFIED_1D_U32_F32_GRAD
   3044   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3045                     Int32Regs:$b, Int32Regs:$a),
   3046               (ins Int64Regs:$t, Float32Regs:$x,
   3047                    Float32Regs:$gradx, Float32Regs:$grady),
   3048               "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3049               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
   3050               []>;
   3051 
   3052 def TEX_UNIFIED_1D_ARRAY_F32_S32
   3053   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3054                     Float32Regs:$b, Float32Regs:$a),
   3055               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
   3056               "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
   3057               "[$t, \\{$l, $x\\}];",
   3058               []>;
   3059 def TEX_UNIFIED_1D_ARRAY_F32_F32
   3060   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3061                     Float32Regs:$b, Float32Regs:$a),
   3062               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
   3063               "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3064               "[$t, \\{$l, $x\\}];",
   3065               []>;
   3066 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
   3067   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3068                     Float32Regs:$b, Float32Regs:$a),
   3069               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3070                    Float32Regs:$lod),
   3071               "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3072               "[$t, \\{$l, $x\\}], $lod;",
   3073               []>;
   3074 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
   3075   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3076                     Float32Regs:$b, Float32Regs:$a),
   3077               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3078                    Float32Regs:$gradx, Float32Regs:$grady),
   3079               "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3080               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
   3081               []>;
   3082 def TEX_UNIFIED_1D_ARRAY_S32_S32
   3083   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3084                     Int32Regs:$b, Int32Regs:$a),
   3085               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
   3086               "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
   3087               "[$t, \\{$l, $x\\}];",
   3088               []>;
   3089 def TEX_UNIFIED_1D_ARRAY_S32_F32
   3090   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3091                     Int32Regs:$b, Int32Regs:$a),
   3092               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
   3093               "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3094               "[$t, \\{$l, $x\\}];",
   3095               []>;
   3096 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
   3097   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3098                     Int32Regs:$b, Int32Regs:$a),
   3099               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3100                    Float32Regs:$lod),
   3101               "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3102               "[$t, \\{$l, $x\\}], $lod;",
   3103               []>;
   3104 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
   3105   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3106                     Int32Regs:$b, Int32Regs:$a),
   3107               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3108                    Float32Regs:$gradx, Float32Regs:$grady),
   3109               "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3110               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
   3111               []>;
   3112 def TEX_UNIFIED_1D_ARRAY_U32_S32
   3113   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3114                     Int32Regs:$b, Int32Regs:$a),
   3115               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
   3116               "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
   3117               "[$t, \\{$l, $x\\}];",
   3118               []>;
   3119 def TEX_UNIFIED_1D_ARRAY_U32_F32
   3120   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3121                     Int32Regs:$b, Int32Regs:$a),
   3122               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
   3123               "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3124               "[$t, \\{$l, $x\\}];",
   3125               []>;
   3126 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
   3127   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3128                     Int32Regs:$b, Int32Regs:$a),
   3129               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3130                    Float32Regs:$lod),
   3131               "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3132               "[$t, \\{$l, $x\\}], $lod;",
   3133               []>;
   3134 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
   3135   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3136                     Int32Regs:$b, Int32Regs:$a),
   3137               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3138                    Float32Regs:$gradx, Float32Regs:$grady),
   3139               "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3140               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
   3141               []>;
   3142 
   3143 def TEX_UNIFIED_2D_F32_S32
   3144   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3145                     Float32Regs:$b, Float32Regs:$a),
   3146               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
   3147               "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
   3148               "[$t, \\{$x, $y\\}];",
   3149               []>;
   3150 def TEX_UNIFIED_2D_F32_F32
   3151   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3152                     Float32Regs:$b, Float32Regs:$a),
   3153               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3154               "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3155               "[$t, \\{$x, $y\\}];",
   3156               []>;
   3157 def TEX_UNIFIED_2D_F32_F32_LEVEL
   3158   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3159                     Float32Regs:$b, Float32Regs:$a),
   3160               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3161                    Float32Regs:$lod),
   3162               "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3163               "[$t, \\{$x, $y\\}], $lod;",
   3164               []>;
   3165 def TEX_UNIFIED_2D_F32_F32_GRAD
   3166   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3167                     Float32Regs:$b, Float32Regs:$a),
   3168               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3169                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   3170                    Float32Regs:$grady0, Float32Regs:$grady1),
   3171               "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3172               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
   3173               "\\{$grady0, $grady1\\};",
   3174               []>;
   3175 def TEX_UNIFIED_2D_S32_S32
   3176   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3177                     Int32Regs:$b, Int32Regs:$a),
   3178               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
   3179               "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
   3180               "[$t, \\{$x, $y\\}];",
   3181               []>;
   3182 def TEX_UNIFIED_2D_S32_F32
   3183   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3184                     Int32Regs:$b, Int32Regs:$a),
   3185               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3186               "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3187               "[$t, \\{$x, $y\\}];",
   3188               []>;
   3189 def TEX_UNIFIED_2D_S32_F32_LEVEL
   3190   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3191                     Int32Regs:$b, Int32Regs:$a),
   3192               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3193                    Float32Regs:$lod),
   3194               "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3195               "[$t, \\{$x, $y\\}], $lod;",
   3196               []>;
   3197 def TEX_UNIFIED_2D_S32_F32_GRAD
   3198   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3199                     Int32Regs:$b, Int32Regs:$a),
   3200               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3201                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   3202                    Float32Regs:$grady0, Float32Regs:$grady1),
   3203               "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3204               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
   3205               "\\{$grady0, $grady1\\};",
   3206               []>;
   3207 def TEX_UNIFIED_2D_U32_S32
   3208   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3209                     Int32Regs:$b, Int32Regs:$a),
   3210               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
   3211               "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
   3212               "[$t, \\{$x, $y\\}];",
   3213               []>;
   3214 def TEX_UNIFIED_2D_U32_F32
   3215   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3216                     Int32Regs:$b, Int32Regs:$a),
   3217               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3218               "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3219               "[$t, \\{$x, $y\\}];",
   3220               []>;
   3221 def TEX_UNIFIED_2D_U32_F32_LEVEL
   3222   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3223                     Int32Regs:$b, Int32Regs:$a),
   3224               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3225                    Float32Regs:$lod),
   3226               "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3227               "[$t, \\{$x, $y\\}], $lod;",
   3228               []>;
   3229 def TEX_UNIFIED_2D_U32_F32_GRAD
   3230   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3231                     Int32Regs:$b, Int32Regs:$a),
   3232               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3233                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   3234                    Float32Regs:$grady0, Float32Regs:$grady1),
   3235               "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3236               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
   3237               "\\{$grady0, $grady1\\};",
   3238               []>;
   3239 
   3240 def TEX_UNIFIED_2D_ARRAY_F32_S32
   3241   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3242                     Float32Regs:$b, Float32Regs:$a),
   3243               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
   3244                    Int32Regs:$y),
   3245               "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
   3246               "[$t, \\{$l, $x, $y, $y\\}];",
   3247               []>;
   3248 def TEX_UNIFIED_2D_ARRAY_F32_F32
   3249   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3250                     Float32Regs:$b, Float32Regs:$a),
   3251               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3252                    Float32Regs:$y),
   3253               "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3254               "[$t, \\{$l, $x, $y, $y\\}];",
   3255               []>;
   3256 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
   3257   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3258                     Float32Regs:$b, Float32Regs:$a),
   3259               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3260                    Float32Regs:$y, Float32Regs:$lod),
   3261               "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3262               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
   3263               []>;
   3264 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
   3265   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3266                     Float32Regs:$b, Float32Regs:$a),
   3267               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3268                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
   3269                    Float32Regs:$grady0, Float32Regs:$grady1),
   3270               "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3271               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
   3272               "\\{$grady0, $grady1\\};",
   3273               []>;
   3274 def TEX_UNIFIED_2D_ARRAY_S32_S32
   3275   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3276                     Int32Regs:$b, Int32Regs:$a),
   3277               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
   3278                    Int32Regs:$y),
   3279               "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
   3280               "[$t, \\{$l, $x, $y, $y\\}];",
   3281               []>;
   3282 def TEX_UNIFIED_2D_ARRAY_S32_F32
   3283   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3284                     Int32Regs:$b, Int32Regs:$a),
   3285               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3286                    Float32Regs:$y),
   3287               "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3288               "[$t, \\{$l, $x, $y, $y\\}];",
   3289               []>;
   3290 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
   3291   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3292                     Int32Regs:$b, Int32Regs:$a),
   3293               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3294                    Float32Regs:$y, Float32Regs:$lod),
   3295               "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3296               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
   3297               []>;
   3298 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
   3299   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3300                     Int32Regs:$b, Int32Regs:$a),
   3301               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3302                    Float32Regs:$y,
   3303                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   3304                    Float32Regs:$grady0, Float32Regs:$grady1),
   3305               "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3306               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
   3307               "\\{$grady0, $grady1\\};",
   3308               []>;
   3309 def TEX_UNIFIED_2D_ARRAY_U32_S32
   3310   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3311                     Int32Regs:$b, Int32Regs:$a),
   3312               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
   3313                    Int32Regs:$y),
   3314               "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
   3315               "[$t, \\{$l, $x, $y, $y\\}];",
   3316               []>;
   3317 def TEX_UNIFIED_2D_ARRAY_U32_F32
   3318   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3319                     Int32Regs:$b, Int32Regs:$a),
   3320               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3321                    Float32Regs:$y),
   3322               "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3323               "[$t, \\{$l, $x, $y, $y\\}];",
   3324               []>;
   3325 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
   3326   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3327                     Int32Regs:$b, Int32Regs:$a),
   3328               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3329                    Float32Regs:$y, Float32Regs:$lod),
   3330               "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3331               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
   3332               []>;
   3333 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
   3334   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3335                     Int32Regs:$b, Int32Regs:$a),
   3336               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   3337                    Float32Regs:$y,
   3338                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   3339                    Float32Regs:$grady0, Float32Regs:$grady1),
   3340               "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3341               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
   3342               "\\{$grady0, $grady1\\};",
   3343               []>;
   3344 
   3345 def TEX_UNIFIED_3D_F32_S32
   3346   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3347                     Float32Regs:$b, Float32Regs:$a),
   3348               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
   3349                    Int32Regs:$z),
   3350               "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
   3351               "[$t, \\{$x, $y, $z, $z\\}];",
   3352               []>;
   3353 def TEX_UNIFIED_3D_F32_F32
   3354   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3355                     Float32Regs:$b, Float32Regs:$a),
   3356               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3357                    Float32Regs:$z),
   3358               "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3359               "[$t, \\{$x, $y, $z, $z\\}];",
   3360               []>;
   3361 def TEX_UNIFIED_3D_F32_F32_LEVEL
   3362   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3363                     Float32Regs:$b, Float32Regs:$a),
   3364               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3365                    Float32Regs:$z, Float32Regs:$lod),
   3366               "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3367               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
   3368               []>;
   3369 def TEX_UNIFIED_3D_F32_F32_GRAD
   3370   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3371                     Float32Regs:$b, Float32Regs:$a),
   3372               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3373                    Float32Regs:$z,
   3374                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   3375                    Float32Regs:$gradx2, Float32Regs:$grady0,
   3376                    Float32Regs:$grady1, Float32Regs:$grady2),
   3377               "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3378               "[$t, \\{$x, $y, $z, $z\\}], "
   3379               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
   3380               "\\{$grady0, $grady1, $grady2, $grady2\\};",
   3381               []>;
   3382 def TEX_UNIFIED_3D_S32_S32
   3383   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3384                     Int32Regs:$b, Int32Regs:$a),
   3385               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
   3386                    Int32Regs:$z),
   3387               "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
   3388               "[$t, \\{$x, $y, $z, $z\\}];",
   3389               []>;
   3390 def TEX_UNIFIED_3D_S32_F32
   3391   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3392                     Int32Regs:$b, Int32Regs:$a),
   3393               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3394                    Float32Regs:$z),
   3395               "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3396               "[$t, \\{$x, $y, $z, $z\\}];",
   3397               []>;
   3398 def TEX_UNIFIED_3D_S32_F32_LEVEL
   3399   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3400                     Int32Regs:$b, Int32Regs:$a),
   3401               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3402                    Float32Regs:$z, Float32Regs:$lod),
   3403               "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3404               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
   3405               []>;
   3406 def TEX_UNIFIED_3D_S32_F32_GRAD
   3407   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3408                     Int32Regs:$b, Int32Regs:$a),
   3409               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3410                    Float32Regs:$z,
   3411                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   3412                    Float32Regs:$gradx2, Float32Regs:$grady0,
   3413                    Float32Regs:$grady1, Float32Regs:$grady2),
   3414               "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3415               "[$t, \\{$x, $y, $z, $z\\}], "
   3416               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
   3417               "\\{$grady0, $grady1, $grady2, $grady2\\};",
   3418               []>;
   3419 def TEX_UNIFIED_3D_U32_S32
   3420   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3421                     Int32Regs:$b, Int32Regs:$a),
   3422               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
   3423                    Int32Regs:$z),
   3424               "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
   3425               "[$t, \\{$x, $y, $z, $z\\}];",
   3426               []>;
   3427 def TEX_UNIFIED_3D_U32_F32
   3428   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3429                     Int32Regs:$b, Int32Regs:$a),
   3430               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3431                    Float32Regs:$z),
   3432               "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3433               "[$t, \\{$x, $y, $z, $z\\}];",
   3434               []>;
   3435 def TEX_UNIFIED_3D_U32_F32_LEVEL
   3436   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3437                     Int32Regs:$b, Int32Regs:$a),
   3438               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3439                    Float32Regs:$z, Float32Regs:$lod),
   3440               "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3441               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
   3442               []>;
   3443 def TEX_UNIFIED_3D_U32_F32_GRAD
   3444   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3445                     Int32Regs:$b, Int32Regs:$a),
   3446               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3447                    Float32Regs:$z,
   3448                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   3449                    Float32Regs:$gradx2, Float32Regs:$grady0,
   3450                    Float32Regs:$grady1, Float32Regs:$grady2),
   3451               "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3452               "[$t, \\{$x, $y, $z, $z\\}], "
   3453               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
   3454               "\\{$grady0, $grady1, $grady2, $grady2\\};",
   3455               []>;
   3456 
   3457 def TEX_UNIFIED_CUBE_F32_F32
   3458   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3459                     Float32Regs:$b, Float32Regs:$a),
   3460               (ins Int64Regs:$t,
   3461                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   3462               "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3463               "[$t, \\{$x, $y, $z, $z\\}];",
   3464               []>;
   3465 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
   3466   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3467                     Float32Regs:$b, Float32Regs:$a),
   3468               (ins Int64Regs:$t,
   3469                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   3470                    Float32Regs:$lod),
   3471               "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3472               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
   3473               []>;
   3474 def TEX_UNIFIED_CUBE_S32_F32
   3475   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3476                     Int32Regs:$b, Int32Regs:$a),
   3477               (ins Int64Regs:$t,
   3478                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   3479               "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3480               "[$t, \\{$x, $y, $z, $z\\}];",
   3481               []>;
   3482 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
   3483   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3484                     Int32Regs:$b, Int32Regs:$a),
   3485               (ins Int64Regs:$t,
   3486                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   3487                    Float32Regs:$lod),
   3488               "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3489               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
   3490               []>;
   3491 def TEX_UNIFIED_CUBE_U32_F32
   3492   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3493                     Int32Regs:$b, Int32Regs:$a),
   3494               (ins Int64Regs:$t,
   3495                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   3496               "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3497               "[$t, \\{$x, $y, $z, $z\\}];",
   3498               []>;
   3499 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
   3500   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3501                     Int32Regs:$b, Int32Regs:$a),
   3502               (ins Int64Regs:$t,
   3503                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   3504                    Float32Regs:$lod),
   3505               "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3506               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
   3507               []>;
   3508 
   3509 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
   3510   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3511                     Float32Regs:$b, Float32Regs:$a),
   3512               (ins Int64Regs:$t, Int32Regs:$l,
   3513                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   3514               "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3515               "[$t, \\{$l, $x, $y, $z\\}];",
   3516               []>;
   3517 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
   3518   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3519                     Float32Regs:$b, Float32Regs:$a),
   3520               (ins Int64Regs:$t, Int32Regs:$l,
   3521                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   3522                    Float32Regs:$lod),
   3523               "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
   3524               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
   3525               []>;
   3526 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
   3527   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3528                     Int32Regs:$b, Int32Regs:$a),
   3529               (ins Int64Regs:$t, Int32Regs:$l,
   3530                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   3531               "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3532               "[$t, \\{$l, $x, $y, $z\\}];",
   3533               []>;
   3534 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
   3535   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3536                     Int32Regs:$b, Int32Regs:$a),
   3537               (ins Int64Regs:$t, Int32Regs:$l,
   3538                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   3539                    Float32Regs:$lod),
   3540               "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
   3541               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
   3542               []>;
   3543 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
   3544   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3545                     Int32Regs:$b, Int32Regs:$a),
   3546               (ins Int64Regs:$t, Int32Regs:$l,
   3547                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   3548               "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3549               "[$t, \\{$l, $x, $y, $z\\}];",
   3550               []>;
   3551 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
   3552   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3553                     Int32Regs:$b, Int32Regs:$a),
   3554               (ins Int64Regs:$t, Int32Regs:$l,
   3555                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   3556                    Float32Regs:$lod),
   3557               "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
   3558               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
   3559               []>;
   3560 
   3561 def TLD4_UNIFIED_R_2D_F32_F32
   3562   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   3563                     Float32Regs:$v2, Float32Regs:$v3),
   3564               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3565               "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   3566               "[$t, \\{$x, $y\\}];",
   3567               []>;
   3568 def TLD4_UNIFIED_G_2D_F32_F32
   3569   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   3570                     Float32Regs:$v2, Float32Regs:$v3),
   3571               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3572               "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   3573               "[$t, \\{$x, $y\\}];",
   3574               []>;
   3575 def TLD4_UNIFIED_B_2D_F32_F32
   3576   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   3577                     Float32Regs:$v2, Float32Regs:$v3),
   3578               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3579               "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   3580               "[$t, \\{$x, $y\\}];",
   3581               []>;
   3582 def TLD4_UNIFIED_A_2D_F32_F32
   3583   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   3584                     Float32Regs:$v2, Float32Regs:$v3),
   3585               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3586               "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   3587               "[$t, \\{$x, $y\\}];",
   3588               []>;
   3589 def TLD4_UNIFIED_R_2D_S32_F32
   3590   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3591                     Int32Regs:$v2, Int32Regs:$v3),
   3592               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3593               "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   3594               "[$t, \\{$x, $y\\}];",
   3595               []>;
   3596 def TLD4_UNIFIED_G_2D_S32_F32
   3597   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3598                     Int32Regs:$v2, Int32Regs:$v3),
   3599               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3600               "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   3601               "[$t, \\{$x, $y\\}];",
   3602               []>;
   3603 def TLD4_UNIFIED_B_2D_S32_F32
   3604   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3605                     Int32Regs:$v2, Int32Regs:$v3),
   3606               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3607               "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   3608               "[$t, \\{$x, $y\\}];",
   3609               []>;
   3610 def TLD4_UNIFIED_A_2D_S32_F32
   3611   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3612                     Int32Regs:$v2, Int32Regs:$v3),
   3613               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3614               "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   3615               "[$t, \\{$x, $y\\}];",
   3616               []>;
   3617 def TLD4_UNIFIED_R_2D_U32_F32
   3618   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3619                     Int32Regs:$v2, Int32Regs:$v3),
   3620               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3621               "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   3622               "[$t, \\{$x, $y\\}];",
   3623               []>;
   3624 def TLD4_UNIFIED_G_2D_U32_F32
   3625   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3626                     Int32Regs:$v2, Int32Regs:$v3),
   3627               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3628               "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   3629               "[$t, \\{$x, $y\\}];",
   3630               []>;
   3631 def TLD4_UNIFIED_B_2D_U32_F32
   3632   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3633                     Int32Regs:$v2, Int32Regs:$v3),
   3634               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3635               "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   3636               "[$t, \\{$x, $y\\}];",
   3637               []>;
   3638 def TLD4_UNIFIED_A_2D_U32_F32
   3639   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3640                     Int32Regs:$v2, Int32Regs:$v3),
   3641               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3642               "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
   3643               "[$t, \\{$x, $y\\}];",
   3644               []>;
   3645 }
   3646 
   3647 
   3648 
   3649 //=== Surface load instructions
   3650 // .clamp variant
   3651 let IsSuld = 1 in {
   3652 def SULD_1D_I8_CLAMP
   3653   : NVPTXInst<(outs Int16Regs:$r),
   3654               (ins Int64Regs:$s, Int32Regs:$x),
   3655               "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
   3656               []>;
   3657 def SULD_1D_I16_CLAMP
   3658   : NVPTXInst<(outs Int16Regs:$r),
   3659               (ins Int64Regs:$s, Int32Regs:$x),
   3660               "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
   3661               []>;
   3662 def SULD_1D_I32_CLAMP
   3663   : NVPTXInst<(outs Int32Regs:$r),
   3664               (ins Int64Regs:$s, Int32Regs:$x),
   3665               "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
   3666               []>;
   3667 def SULD_1D_I64_CLAMP
   3668   : NVPTXInst<(outs Int64Regs:$r),
   3669               (ins Int64Regs:$s, Int32Regs:$x),
   3670               "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
   3671               []>;
   3672 
   3673 def SULD_1D_ARRAY_I8_CLAMP
   3674   : NVPTXInst<(outs Int16Regs:$r),
   3675               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3676               "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
   3677               []>;
   3678 def SULD_1D_ARRAY_I16_CLAMP
   3679   : NVPTXInst<(outs Int16Regs:$r),
   3680               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3681               "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
   3682               []>;
   3683 def SULD_1D_ARRAY_I32_CLAMP
   3684   : NVPTXInst<(outs Int32Regs:$r),
   3685               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3686               "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
   3687               []>;
   3688 def SULD_1D_ARRAY_I64_CLAMP
   3689   : NVPTXInst<(outs Int64Regs:$r),
   3690               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3691               "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
   3692               []>;
   3693 
   3694 def SULD_2D_I8_CLAMP
   3695   : NVPTXInst<(outs Int16Regs:$r),
   3696               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3697               "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
   3698               []>;
   3699 def SULD_2D_I16_CLAMP
   3700   : NVPTXInst<(outs Int16Regs:$r),
   3701               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3702               "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
   3703               []>;
   3704 def SULD_2D_I32_CLAMP
   3705   : NVPTXInst<(outs Int32Regs:$r),
   3706               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3707               "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
   3708               []>;
   3709 def SULD_2D_I64_CLAMP
   3710   : NVPTXInst<(outs Int64Regs:$r),
   3711               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3712               "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
   3713               []>;
   3714 
   3715 def SULD_2D_ARRAY_I8_CLAMP
   3716   : NVPTXInst<(outs Int16Regs:$r),
   3717               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3718               "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   3719               []>;
   3720 def SULD_2D_ARRAY_I16_CLAMP
   3721   : NVPTXInst<(outs Int16Regs:$r),
   3722               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3723               "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   3724               []>;
   3725 def SULD_2D_ARRAY_I32_CLAMP
   3726   : NVPTXInst<(outs Int32Regs:$r),
   3727               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3728               "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   3729               []>;
   3730 def SULD_2D_ARRAY_I64_CLAMP
   3731   : NVPTXInst<(outs Int64Regs:$r),
   3732               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3733               "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   3734               []>;
   3735 
   3736 def SULD_3D_I8_CLAMP
   3737   : NVPTXInst<(outs Int16Regs:$r),
   3738               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3739               "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3740               []>;
   3741 def SULD_3D_I16_CLAMP
   3742   : NVPTXInst<(outs Int16Regs:$r),
   3743               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3744               "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3745               []>;
   3746 def SULD_3D_I32_CLAMP
   3747   : NVPTXInst<(outs Int32Regs:$r),
   3748               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3749               "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3750               []>;
   3751 def SULD_3D_I64_CLAMP
   3752   : NVPTXInst<(outs Int64Regs:$r),
   3753               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3754               "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3755               []>;
   3756 }
   3757 
   3758 let IsSuld = 2 in {
   3759 def SULD_1D_V2I8_CLAMP
   3760   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3761               (ins Int64Regs:$s, Int32Regs:$x),
   3762               "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
   3763               []>;
   3764 def SULD_1D_V2I16_CLAMP
   3765   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3766               (ins Int64Regs:$s, Int32Regs:$x),
   3767               "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
   3768               []>;
   3769 def SULD_1D_V2I32_CLAMP
   3770   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3771               (ins Int64Regs:$s, Int32Regs:$x),
   3772               "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
   3773               []>;
   3774 def SULD_1D_V2I64_CLAMP
   3775   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3776               (ins Int64Regs:$s, Int32Regs:$x),
   3777               "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
   3778               []>;
   3779 
   3780 def SULD_1D_ARRAY_V2I8_CLAMP
   3781   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3782               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3783               "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   3784               []>;
   3785 def SULD_1D_ARRAY_V2I16_CLAMP
   3786   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3787               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3788               "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   3789               []>;
   3790 def SULD_1D_ARRAY_V2I32_CLAMP
   3791   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3792               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3793               "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   3794               []>;
   3795 def SULD_1D_ARRAY_V2I64_CLAMP
   3796   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3797               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3798               "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   3799               []>;
   3800 
   3801 def SULD_2D_V2I8_CLAMP
   3802   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3803               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3804               "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   3805               []>;
   3806 def SULD_2D_V2I16_CLAMP
   3807   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3808               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3809               "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   3810               []>;
   3811 def SULD_2D_V2I32_CLAMP
   3812   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3813               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3814               "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   3815               []>;
   3816 def SULD_2D_V2I64_CLAMP
   3817   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3818               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3819               "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   3820               []>;
   3821 
   3822 def SULD_2D_ARRAY_V2I8_CLAMP
   3823   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3824               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3825               "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
   3826               "[$s, \\{$l, $x, $y, $y\\}];",
   3827               []>;
   3828 def SULD_2D_ARRAY_V2I16_CLAMP
   3829   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3830               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3831               "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
   3832               "[$s, \\{$l, $x, $y, $y\\}];",
   3833               []>;
   3834 def SULD_2D_ARRAY_V2I32_CLAMP
   3835   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3836               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3837               "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
   3838               "[$s, \\{$l, $x, $y, $y\\}];",
   3839               []>;
   3840 def SULD_2D_ARRAY_V2I64_CLAMP
   3841   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3842               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3843               "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
   3844               "[$s, \\{$l, $x, $y, $y\\}];",
   3845               []>;
   3846 
   3847 def SULD_3D_V2I8_CLAMP
   3848   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3849               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3850               "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3851               []>;
   3852 def SULD_3D_V2I16_CLAMP
   3853   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3854               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3855               "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3856               []>;
   3857 def SULD_3D_V2I32_CLAMP
   3858   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3859               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3860               "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3861               []>;
   3862 def SULD_3D_V2I64_CLAMP
   3863   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3864               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3865               "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3866               []>;
   3867 }
   3868 
   3869 let IsSuld = 3 in {
   3870 def SULD_1D_V4I8_CLAMP
   3871   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3872               (ins Int64Regs:$s, Int32Regs:$x),
   3873               "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   3874               []>;
   3875 def SULD_1D_V4I16_CLAMP
   3876   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3877               (ins Int64Regs:$s, Int32Regs:$x),
   3878               "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   3879               []>;
   3880 def SULD_1D_V4I32_CLAMP
   3881   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3882               (ins Int64Regs:$s, Int32Regs:$x),
   3883               "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   3884               []>;
   3885 
   3886 def SULD_1D_ARRAY_V4I8_CLAMP
   3887   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3888               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3889               "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
   3890               "[$s, \\{$l, $x\\}];",
   3891               []>;
   3892 def SULD_1D_ARRAY_V4I16_CLAMP
   3893   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3894               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3895               "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
   3896               "[$s, \\{$l, $x\\}];",
   3897               []>;
   3898 def SULD_1D_ARRAY_V4I32_CLAMP
   3899   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3900               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3901               "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
   3902               "[$s, \\{$l, $x\\}];",
   3903               []>;
   3904 
   3905 def SULD_2D_V4I8_CLAMP
   3906   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3907               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3908               "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   3909               []>;
   3910 def SULD_2D_V4I16_CLAMP
   3911   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3912               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3913               "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   3914               []>;
   3915 def SULD_2D_V4I32_CLAMP
   3916   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3917               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3918               "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   3919               []>;
   3920 
   3921 def SULD_2D_ARRAY_V4I8_CLAMP
   3922   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3923               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3924               "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
   3925               "[$s, \\{$l, $x, $y, $y\\}];",
   3926               []>;
   3927 def SULD_2D_ARRAY_V4I16_CLAMP
   3928   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3929               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3930               "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
   3931               "[$s, \\{$l, $x, $y, $y\\}];",
   3932               []>;
   3933 def SULD_2D_ARRAY_V4I32_CLAMP
   3934   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3935               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3936               "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
   3937               "[$s, \\{$l, $x, $y, $y\\}];",
   3938               []>;
   3939 
   3940 
   3941 def SULD_3D_V4I8_CLAMP
   3942   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3943               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3944               "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
   3945               "[$s, \\{$x, $y, $z, $z\\}];",
   3946               []>;
   3947 def SULD_3D_V4I16_CLAMP
   3948   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3949               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3950               "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
   3951               "[$s, \\{$x, $y, $z, $z\\}];",
   3952               []>;
   3953 def SULD_3D_V4I32_CLAMP
   3954   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3955               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3956               "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
   3957               "[$s, \\{$x, $y, $z, $z\\}];",
   3958               []>;
   3959 }
   3960 
   3961 
   3962 // .trap variant
   3963 let IsSuld = 1 in {
   3964 def SULD_1D_I8_TRAP
   3965   : NVPTXInst<(outs Int16Regs:$r),
   3966               (ins Int64Regs:$s, Int32Regs:$x),
   3967               "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
   3968               []>;
   3969 def SULD_1D_I16_TRAP
   3970   : NVPTXInst<(outs Int16Regs:$r),
   3971               (ins Int64Regs:$s, Int32Regs:$x),
   3972               "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
   3973               []>;
   3974 def SULD_1D_I32_TRAP
   3975   : NVPTXInst<(outs Int32Regs:$r),
   3976               (ins Int64Regs:$s, Int32Regs:$x),
   3977               "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
   3978               []>;
   3979 def SULD_1D_I64_TRAP
   3980   : NVPTXInst<(outs Int64Regs:$r),
   3981               (ins Int64Regs:$s, Int32Regs:$x),
   3982               "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
   3983               []>;
   3984 
   3985 def SULD_1D_ARRAY_I8_TRAP
   3986   : NVPTXInst<(outs Int16Regs:$r),
   3987               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3988               "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
   3989               []>;
   3990 def SULD_1D_ARRAY_I16_TRAP
   3991   : NVPTXInst<(outs Int16Regs:$r),
   3992               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3993               "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
   3994               []>;
   3995 def SULD_1D_ARRAY_I32_TRAP
   3996   : NVPTXInst<(outs Int32Regs:$r),
   3997               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3998               "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
   3999               []>;
   4000 def SULD_1D_ARRAY_I64_TRAP
   4001   : NVPTXInst<(outs Int64Regs:$r),
   4002               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4003               "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
   4004               []>;
   4005 
   4006 def SULD_2D_I8_TRAP
   4007   : NVPTXInst<(outs Int16Regs:$r),
   4008               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4009               "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
   4010               []>;
   4011 def SULD_2D_I16_TRAP
   4012   : NVPTXInst<(outs Int16Regs:$r),
   4013               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4014               "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
   4015               []>;
   4016 def SULD_2D_I32_TRAP
   4017   : NVPTXInst<(outs Int32Regs:$r),
   4018               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4019               "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
   4020               []>;
   4021 def SULD_2D_I64_TRAP
   4022   : NVPTXInst<(outs Int64Regs:$r),
   4023               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4024               "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
   4025               []>;
   4026 
   4027 def SULD_2D_ARRAY_I8_TRAP
   4028   : NVPTXInst<(outs Int16Regs:$r),
   4029               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4030               "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   4031               []>;
   4032 def SULD_2D_ARRAY_I16_TRAP
   4033   : NVPTXInst<(outs Int16Regs:$r),
   4034               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4035               "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   4036               []>;
   4037 def SULD_2D_ARRAY_I32_TRAP
   4038   : NVPTXInst<(outs Int32Regs:$r),
   4039               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4040               "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   4041               []>;
   4042 def SULD_2D_ARRAY_I64_TRAP
   4043   : NVPTXInst<(outs Int64Regs:$r),
   4044               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4045               "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   4046               []>;
   4047 
   4048 def SULD_3D_I8_TRAP
   4049   : NVPTXInst<(outs Int16Regs:$r),
   4050               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4051               "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4052               []>;
   4053 def SULD_3D_I16_TRAP
   4054   : NVPTXInst<(outs Int16Regs:$r),
   4055               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4056               "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4057               []>;
   4058 def SULD_3D_I32_TRAP
   4059   : NVPTXInst<(outs Int32Regs:$r),
   4060               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4061               "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4062               []>;
   4063 def SULD_3D_I64_TRAP
   4064   : NVPTXInst<(outs Int64Regs:$r),
   4065               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4066               "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4067               []>;
   4068 }
   4069 
   4070 let IsSuld = 2 in {
   4071 def SULD_1D_V2I8_TRAP
   4072   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4073               (ins Int64Regs:$s, Int32Regs:$x),
   4074               "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
   4075               []>;
   4076 def SULD_1D_V2I16_TRAP
   4077   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4078               (ins Int64Regs:$s, Int32Regs:$x),
   4079               "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
   4080               []>;
   4081 def SULD_1D_V2I32_TRAP
   4082   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4083               (ins Int64Regs:$s, Int32Regs:$x),
   4084               "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
   4085               []>;
   4086 def SULD_1D_V2I64_TRAP
   4087   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4088               (ins Int64Regs:$s, Int32Regs:$x),
   4089               "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
   4090               []>;
   4091 
   4092 def SULD_1D_ARRAY_V2I8_TRAP
   4093   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4094               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4095               "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   4096               []>;
   4097 def SULD_1D_ARRAY_V2I16_TRAP
   4098   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4099               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4100               "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   4101               []>;
   4102 def SULD_1D_ARRAY_V2I32_TRAP
   4103   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4104               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4105               "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   4106               []>;
   4107 def SULD_1D_ARRAY_V2I64_TRAP
   4108   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4109               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4110               "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   4111               []>;
   4112 
   4113 def SULD_2D_V2I8_TRAP
   4114   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4115               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4116               "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   4117               []>;
   4118 def SULD_2D_V2I16_TRAP
   4119   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4120               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4121               "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   4122               []>;
   4123 def SULD_2D_V2I32_TRAP
   4124   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4125               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4126               "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   4127               []>;
   4128 def SULD_2D_V2I64_TRAP
   4129   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4130               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4131               "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   4132               []>;
   4133 
   4134 def SULD_2D_ARRAY_V2I8_TRAP
   4135   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4136               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4137               "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
   4138               "[$s, \\{$l, $x, $y, $y\\}];",
   4139               []>;
   4140 def SULD_2D_ARRAY_V2I16_TRAP
   4141   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4142               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4143               "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
   4144               "[$s, \\{$l, $x, $y, $y\\}];",
   4145               []>;
   4146 def SULD_2D_ARRAY_V2I32_TRAP
   4147   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4148               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4149               "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
   4150               "[$s, \\{$l, $x, $y, $y\\}];",
   4151               []>;
   4152 def SULD_2D_ARRAY_V2I64_TRAP
   4153   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4154               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4155               "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
   4156               "[$s, \\{$l, $x, $y, $y\\}];",
   4157               []>;
   4158 
   4159 def SULD_3D_V2I8_TRAP
   4160   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4161               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4162               "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4163               []>;
   4164 def SULD_3D_V2I16_TRAP
   4165   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4166               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4167               "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4168               []>;
   4169 def SULD_3D_V2I32_TRAP
   4170   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4171               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4172               "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4173               []>;
   4174 def SULD_3D_V2I64_TRAP
   4175   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4176               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4177               "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4178               []>;
   4179 }
   4180 
   4181 let IsSuld = 3 in {
   4182 def SULD_1D_V4I8_TRAP
   4183   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4184               (ins Int64Regs:$s, Int32Regs:$x),
   4185               "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   4186               []>;
   4187 def SULD_1D_V4I16_TRAP
   4188   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4189               (ins Int64Regs:$s, Int32Regs:$x),
   4190               "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   4191               []>;
   4192 def SULD_1D_V4I32_TRAP
   4193   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4194               (ins Int64Regs:$s, Int32Regs:$x),
   4195               "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   4196               []>;
   4197 
   4198 def SULD_1D_ARRAY_V4I8_TRAP
   4199   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4200               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4201               "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
   4202               "[$s, \\{$l, $x\\}];",
   4203               []>;
   4204 def SULD_1D_ARRAY_V4I16_TRAP
   4205   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4206               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4207               "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
   4208               "[$s, \\{$l, $x\\}];",
   4209               []>;
   4210 def SULD_1D_ARRAY_V4I32_TRAP
   4211   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4212               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4213               "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
   4214               "[$s, \\{$l, $x\\}];",
   4215               []>;
   4216 
   4217 def SULD_2D_V4I8_TRAP
   4218   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4219               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4220               "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   4221               []>;
   4222 def SULD_2D_V4I16_TRAP
   4223   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4224               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4225               "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   4226               []>;
   4227 def SULD_2D_V4I32_TRAP
   4228   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4229               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4230               "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   4231               []>;
   4232 
   4233 def SULD_2D_ARRAY_V4I8_TRAP
   4234   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4235               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4236               "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
   4237               "[$s, \\{$l, $x, $y, $y\\}];",
   4238               []>;
   4239 def SULD_2D_ARRAY_V4I16_TRAP
   4240   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4241               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4242               "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
   4243               "[$s, \\{$l, $x, $y, $y\\}];",
   4244               []>;
   4245 def SULD_2D_ARRAY_V4I32_TRAP
   4246   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4247               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4248               "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
   4249               "[$s, \\{$l, $x, $y, $y\\}];",
   4250               []>;
   4251 
   4252 
   4253 def SULD_3D_V4I8_TRAP
   4254   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4255               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4256               "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
   4257               "[$s, \\{$x, $y, $z, $z\\}];",
   4258               []>;
   4259 def SULD_3D_V4I16_TRAP
   4260   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4261               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4262               "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
   4263               "[$s, \\{$x, $y, $z, $z\\}];",
   4264               []>;
   4265 def SULD_3D_V4I32_TRAP
   4266   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4267               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4268               "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
   4269               "[$s, \\{$x, $y, $z, $z\\}];",
   4270               []>;
   4271 }
   4272 
   4273 // .zero variant
   4274 let IsSuld = 1 in {
   4275 def SULD_1D_I8_ZERO
   4276   : NVPTXInst<(outs Int16Regs:$r),
   4277               (ins Int64Regs:$s, Int32Regs:$x),
   4278               "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
   4279               []>;
   4280 def SULD_1D_I16_ZERO
   4281   : NVPTXInst<(outs Int16Regs:$r),
   4282               (ins Int64Regs:$s, Int32Regs:$x),
   4283               "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
   4284               []>;
   4285 def SULD_1D_I32_ZERO
   4286   : NVPTXInst<(outs Int32Regs:$r),
   4287               (ins Int64Regs:$s, Int32Regs:$x),
   4288               "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
   4289               []>;
   4290 def SULD_1D_I64_ZERO
   4291   : NVPTXInst<(outs Int64Regs:$r),
   4292               (ins Int64Regs:$s, Int32Regs:$x),
   4293               "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
   4294               []>;
   4295 
   4296 def SULD_1D_ARRAY_I8_ZERO
   4297   : NVPTXInst<(outs Int16Regs:$r),
   4298               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4299               "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
   4300               []>;
   4301 def SULD_1D_ARRAY_I16_ZERO
   4302   : NVPTXInst<(outs Int16Regs:$r),
   4303               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4304               "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
   4305               []>;
   4306 def SULD_1D_ARRAY_I32_ZERO
   4307   : NVPTXInst<(outs Int32Regs:$r),
   4308               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4309               "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
   4310               []>;
   4311 def SULD_1D_ARRAY_I64_ZERO
   4312   : NVPTXInst<(outs Int64Regs:$r),
   4313               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4314               "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
   4315               []>;
   4316 
   4317 def SULD_2D_I8_ZERO
   4318   : NVPTXInst<(outs Int16Regs:$r),
   4319               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4320               "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
   4321               []>;
   4322 def SULD_2D_I16_ZERO
   4323   : NVPTXInst<(outs Int16Regs:$r),
   4324               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4325               "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
   4326               []>;
   4327 def SULD_2D_I32_ZERO
   4328   : NVPTXInst<(outs Int32Regs:$r),
   4329               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4330               "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
   4331               []>;
   4332 def SULD_2D_I64_ZERO
   4333   : NVPTXInst<(outs Int64Regs:$r),
   4334               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4335               "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
   4336               []>;
   4337 
   4338 def SULD_2D_ARRAY_I8_ZERO
   4339   : NVPTXInst<(outs Int16Regs:$r),
   4340               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4341               "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   4342               []>;
   4343 def SULD_2D_ARRAY_I16_ZERO
   4344   : NVPTXInst<(outs Int16Regs:$r),
   4345               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4346               "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   4347               []>;
   4348 def SULD_2D_ARRAY_I32_ZERO
   4349   : NVPTXInst<(outs Int32Regs:$r),
   4350               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4351               "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   4352               []>;
   4353 def SULD_2D_ARRAY_I64_ZERO
   4354   : NVPTXInst<(outs Int64Regs:$r),
   4355               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4356               "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   4357               []>;
   4358 
   4359 def SULD_3D_I8_ZERO
   4360   : NVPTXInst<(outs Int16Regs:$r),
   4361               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4362               "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4363               []>;
   4364 def SULD_3D_I16_ZERO
   4365   : NVPTXInst<(outs Int16Regs:$r),
   4366               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4367               "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4368               []>;
   4369 def SULD_3D_I32_ZERO
   4370   : NVPTXInst<(outs Int32Regs:$r),
   4371               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4372               "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4373               []>;
   4374 def SULD_3D_I64_ZERO
   4375   : NVPTXInst<(outs Int64Regs:$r),
   4376               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4377               "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4378               []>;
   4379 }
   4380 
   4381 let IsSuld = 2 in {
   4382 def SULD_1D_V2I8_ZERO
   4383   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4384               (ins Int64Regs:$s, Int32Regs:$x),
   4385               "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
   4386               []>;
   4387 def SULD_1D_V2I16_ZERO
   4388   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4389               (ins Int64Regs:$s, Int32Regs:$x),
   4390               "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
   4391               []>;
   4392 def SULD_1D_V2I32_ZERO
   4393   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4394               (ins Int64Regs:$s, Int32Regs:$x),
   4395               "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
   4396               []>;
   4397 def SULD_1D_V2I64_ZERO
   4398   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4399               (ins Int64Regs:$s, Int32Regs:$x),
   4400               "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
   4401               []>;
   4402 
   4403 def SULD_1D_ARRAY_V2I8_ZERO
   4404   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4405               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4406               "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   4407               []>;
   4408 def SULD_1D_ARRAY_V2I16_ZERO
   4409   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4410               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4411               "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   4412               []>;
   4413 def SULD_1D_ARRAY_V2I32_ZERO
   4414   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4415               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4416               "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   4417               []>;
   4418 def SULD_1D_ARRAY_V2I64_ZERO
   4419   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4420               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4421               "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   4422               []>;
   4423 
   4424 def SULD_2D_V2I8_ZERO
   4425   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4426               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4427               "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   4428               []>;
   4429 def SULD_2D_V2I16_ZERO
   4430   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4431               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4432               "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   4433               []>;
   4434 def SULD_2D_V2I32_ZERO
   4435   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4436               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4437               "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   4438               []>;
   4439 def SULD_2D_V2I64_ZERO
   4440   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4441               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4442               "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   4443               []>;
   4444 
   4445 def SULD_2D_ARRAY_V2I8_ZERO
   4446   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4447               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4448               "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
   4449               "[$s, \\{$l, $x, $y, $y\\}];",
   4450               []>;
   4451 def SULD_2D_ARRAY_V2I16_ZERO
   4452   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4453               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4454               "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
   4455               "[$s, \\{$l, $x, $y, $y\\}];",
   4456               []>;
   4457 def SULD_2D_ARRAY_V2I32_ZERO
   4458   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4459               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4460               "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
   4461               "[$s, \\{$l, $x, $y, $y\\}];",
   4462               []>;
   4463 def SULD_2D_ARRAY_V2I64_ZERO
   4464   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4465               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4466               "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
   4467               "[$s, \\{$l, $x, $y, $y\\}];",
   4468               []>;
   4469 
   4470 def SULD_3D_V2I8_ZERO
   4471   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4472               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4473               "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4474               []>;
   4475 def SULD_3D_V2I16_ZERO
   4476   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4477               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4478               "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4479               []>;
   4480 def SULD_3D_V2I32_ZERO
   4481   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4482               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4483               "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4484               []>;
   4485 def SULD_3D_V2I64_ZERO
   4486   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4487               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4488               "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4489               []>;
   4490 }
   4491 
   4492 let IsSuld = 3 in {
   4493 def SULD_1D_V4I8_ZERO
   4494   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4495               (ins Int64Regs:$s, Int32Regs:$x),
   4496               "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   4497               []>;
   4498 def SULD_1D_V4I16_ZERO
   4499   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4500               (ins Int64Regs:$s, Int32Regs:$x),
   4501               "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   4502               []>;
   4503 def SULD_1D_V4I32_ZERO
   4504   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4505               (ins Int64Regs:$s, Int32Regs:$x),
   4506               "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   4507               []>;
   4508 
   4509 def SULD_1D_ARRAY_V4I8_ZERO
   4510   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4511               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4512               "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
   4513               "[$s, \\{$l, $x\\}];",
   4514               []>;
   4515 def SULD_1D_ARRAY_V4I16_ZERO
   4516   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4517               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4518               "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
   4519               "[$s, \\{$l, $x\\}];",
   4520               []>;
   4521 def SULD_1D_ARRAY_V4I32_ZERO
   4522   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4523               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4524               "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
   4525               "[$s, \\{$l, $x\\}];",
   4526               []>;
   4527 
   4528 def SULD_2D_V4I8_ZERO
   4529   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4530               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4531               "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   4532               []>;
   4533 def SULD_2D_V4I16_ZERO
   4534   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4535               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4536               "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   4537               []>;
   4538 def SULD_2D_V4I32_ZERO
   4539   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4540               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4541               "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   4542               []>;
   4543 
   4544 def SULD_2D_ARRAY_V4I8_ZERO
   4545   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4546               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4547               "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
   4548               "[$s, \\{$l, $x, $y, $y\\}];",
   4549               []>;
   4550 def SULD_2D_ARRAY_V4I16_ZERO
   4551   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4552               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4553               "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
   4554               "[$s, \\{$l, $x, $y, $y\\}];",
   4555               []>;
   4556 def SULD_2D_ARRAY_V4I32_ZERO
   4557   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4558               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4559               "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
   4560               "[$s, \\{$l, $x, $y, $y\\}];",
   4561               []>;
   4562 
   4563 
   4564 def SULD_3D_V4I8_ZERO
   4565   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4566               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4567               "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
   4568               "[$s, \\{$x, $y, $z, $z\\}];",
   4569               []>;
   4570 def SULD_3D_V4I16_ZERO
   4571   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4572               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4573               "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
   4574               "[$s, \\{$x, $y, $z, $z\\}];",
   4575               []>;
   4576 def SULD_3D_V4I32_ZERO
   4577   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4578               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4579               "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
   4580               "[$s, \\{$x, $y, $z, $z\\}];",
   4581               []>;
   4582 }
   4583 
   4584 //-----------------------------------
   4585 // Texture Query Intrinsics
   4586 //-----------------------------------
   4587 
   4588 let IsSurfTexQuery = 1 in {
   4589 def TXQ_CHANNEL_ORDER
   4590   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4591               "txq.channel_order.b32 \t$d, [$a];",
   4592               []>;
   4593 def TXQ_CHANNEL_DATA_TYPE
   4594   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4595               "txq.channel_data_type.b32 \t$d, [$a];",
   4596               []>;
   4597 def TXQ_WIDTH
   4598   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4599               "txq.width.b32 \t$d, [$a];",
   4600               []>;
   4601 def TXQ_HEIGHT
   4602   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4603               "txq.height.b32 \t$d, [$a];",
   4604               []>;
   4605 def TXQ_DEPTH
   4606   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4607               "txq.depth.b32 \t$d, [$a];",
   4608               []>;
   4609 def TXQ_ARRAY_SIZE
   4610   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4611               "txq.array_size.b32 \t$d, [$a];",
   4612               []>;
   4613 def TXQ_NUM_SAMPLES
   4614   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4615               "txq.num_samples.b32 \t$d, [$a];",
   4616               []>;
   4617 def TXQ_NUM_MIPMAP_LEVELS
   4618   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4619               "txq.num_mipmap_levels.b32 \t$d, [$a];",
   4620               []>;
   4621 }
   4622 
   4623 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
   4624           (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
   4625 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
   4626           (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
   4627 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
   4628           (TXQ_WIDTH Int64Regs:$a)>;
   4629 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
   4630           (TXQ_HEIGHT Int64Regs:$a)>;
   4631 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
   4632           (TXQ_DEPTH Int64Regs:$a)>;
   4633 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
   4634           (TXQ_ARRAY_SIZE Int64Regs:$a)>;
   4635 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
   4636           (TXQ_NUM_SAMPLES Int64Regs:$a)>;
   4637 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
   4638           (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
   4639 
   4640 
   4641 //-----------------------------------
   4642 // Surface Query Intrinsics
   4643 //-----------------------------------
   4644 
   4645 let IsSurfTexQuery = 1 in {
   4646 def SUQ_CHANNEL_ORDER
   4647   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4648               "suq.channel_order.b32 \t$d, [$a];",
   4649               []>;
   4650 def SUQ_CHANNEL_DATA_TYPE
   4651   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4652               "suq.channel_data_type.b32 \t$d, [$a];",
   4653               []>;
   4654 def SUQ_WIDTH
   4655   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4656               "suq.width.b32 \t$d, [$a];",
   4657               []>;
   4658 def SUQ_HEIGHT
   4659   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4660               "suq.height.b32 \t$d, [$a];",
   4661               []>;
   4662 def SUQ_DEPTH
   4663   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4664               "suq.depth.b32 \t$d, [$a];",
   4665               []>;
   4666 def SUQ_ARRAY_SIZE
   4667   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4668               "suq.array_size.b32 \t$d, [$a];",
   4669               []>;
   4670 }
   4671 
   4672 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
   4673           (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
   4674 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
   4675           (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
   4676 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
   4677           (SUQ_WIDTH Int64Regs:$a)>;
   4678 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
   4679           (SUQ_HEIGHT Int64Regs:$a)>;
   4680 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
   4681           (SUQ_DEPTH Int64Regs:$a)>;
   4682 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
   4683           (SUQ_ARRAY_SIZE Int64Regs:$a)>;
   4684 
   4685 
   4686 //===- Handle Query -------------------------------------------------------===//
   4687 
   4688 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
   4689 def ISTYPEP_SAMPLER
   4690   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   4691               "istypep.samplerref \t$d, $a;",
   4692               [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
   4693 def ISTYPEP_SURFACE
   4694   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   4695               "istypep.surfref \t$d, $a;",
   4696               [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
   4697 def ISTYPEP_TEXTURE
   4698   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   4699               "istypep.texref \t$d, $a;",
   4700               [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
   4701 
   4702 //===- Surface Stores -----------------------------------------------------===//
   4703 
   4704 let IsSust = 1 in {
   4705 // Unformatted
   4706 // .clamp variant
   4707 def SUST_B_1D_B8_CLAMP
   4708   : NVPTXInst<(outs),
   4709               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   4710               "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
   4711               []>;
   4712 def SUST_B_1D_B16_CLAMP
   4713   : NVPTXInst<(outs),
   4714               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   4715               "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
   4716               []>;
   4717 def SUST_B_1D_B32_CLAMP
   4718   : NVPTXInst<(outs),
   4719               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   4720               "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
   4721               []>;
   4722 def SUST_B_1D_B64_CLAMP
   4723   : NVPTXInst<(outs),
   4724               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
   4725               "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
   4726               []>;
   4727 def SUST_B_1D_V2B8_CLAMP
   4728   : NVPTXInst<(outs),
   4729               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   4730               "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
   4731               []>;
   4732 def SUST_B_1D_V2B16_CLAMP
   4733   : NVPTXInst<(outs),
   4734               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   4735               "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
   4736               []>;
   4737 def SUST_B_1D_V2B32_CLAMP
   4738   : NVPTXInst<(outs),
   4739               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   4740               "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
   4741               []>;
   4742 def SUST_B_1D_V2B64_CLAMP
   4743   : NVPTXInst<(outs),
   4744               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   4745               "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
   4746               []>;
   4747 def SUST_B_1D_V4B8_CLAMP
   4748   : NVPTXInst<(outs),
   4749               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   4750                    Int16Regs:$b, Int16Regs:$a),
   4751               "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   4752               []>;
   4753 def SUST_B_1D_V4B16_CLAMP
   4754   : NVPTXInst<(outs),
   4755               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   4756                    Int16Regs:$b, Int16Regs:$a),
   4757               "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   4758               []>;
   4759 def SUST_B_1D_V4B32_CLAMP
   4760   : NVPTXInst<(outs),
   4761               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
   4762                    Int32Regs:$b, Int32Regs:$a),
   4763               "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   4764               []>;
   4765 
   4766 
   4767 def SUST_B_1D_ARRAY_B8_CLAMP
   4768   : NVPTXInst<(outs),
   4769               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   4770               "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   4771               []>;
   4772 def SUST_B_1D_ARRAY_B16_CLAMP
   4773   : NVPTXInst<(outs),
   4774               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   4775               "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   4776               []>;
   4777 def SUST_B_1D_ARRAY_B32_CLAMP
   4778   : NVPTXInst<(outs),
   4779               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
   4780               "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   4781               []>;
   4782 def SUST_B_1D_ARRAY_B64_CLAMP
   4783   : NVPTXInst<(outs),
   4784               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
   4785               "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   4786               []>;
   4787 def SUST_B_1D_ARRAY_V2B8_CLAMP
   4788   : NVPTXInst<(outs),
   4789               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   4790                    Int16Regs:$g),
   4791               "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   4792               []>;
   4793 def SUST_B_1D_ARRAY_V2B16_CLAMP
   4794   : NVPTXInst<(outs),
   4795               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   4796                    Int16Regs:$g),
   4797               "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   4798               []>;
   4799 def SUST_B_1D_ARRAY_V2B32_CLAMP
   4800   : NVPTXInst<(outs),
   4801               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   4802                    Int32Regs:$g),
   4803               "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   4804               []>;
   4805 def SUST_B_1D_ARRAY_V2B64_CLAMP
   4806   : NVPTXInst<(outs),
   4807               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
   4808                    Int64Regs:$g),
   4809               "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   4810               []>;
   4811 def SUST_B_1D_ARRAY_V4B8_CLAMP
   4812   : NVPTXInst<(outs),
   4813               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   4814                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4815               "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
   4816               "\\{$r, $g, $b, $a\\};",
   4817               []>;
   4818 def SUST_B_1D_ARRAY_V4B16_CLAMP
   4819   : NVPTXInst<(outs),
   4820               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   4821                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4822              "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
   4823              "\\{$r, $g, $b, $a\\};",
   4824               []>;
   4825 def SUST_B_1D_ARRAY_V4B32_CLAMP
   4826   : NVPTXInst<(outs),
   4827               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   4828                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4829              "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
   4830              "\\{$r, $g, $b, $a\\};",
   4831               []>;
   4832 
   4833 
   4834 def SUST_B_2D_B8_CLAMP
   4835   : NVPTXInst<(outs),
   4836               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   4837               "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
   4838               []>;
   4839 def SUST_B_2D_B16_CLAMP
   4840   : NVPTXInst<(outs),
   4841               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   4842               "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
   4843               []>;
   4844 def SUST_B_2D_B32_CLAMP
   4845   : NVPTXInst<(outs),
   4846               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   4847               "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
   4848               []>;
   4849 def SUST_B_2D_B64_CLAMP
   4850   : NVPTXInst<(outs),
   4851               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   4852               "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
   4853               []>;
   4854 def SUST_B_2D_V2B8_CLAMP
   4855   : NVPTXInst<(outs),
   4856               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   4857                    Int16Regs:$g),
   4858               "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   4859               []>;
   4860 def SUST_B_2D_V2B16_CLAMP
   4861   : NVPTXInst<(outs),
   4862               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   4863                    Int16Regs:$g),
   4864               "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   4865               []>;
   4866 def SUST_B_2D_V2B32_CLAMP
   4867   : NVPTXInst<(outs),
   4868               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   4869                    Int32Regs:$g),
   4870               "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   4871               []>;
   4872 def SUST_B_2D_V2B64_CLAMP
   4873   : NVPTXInst<(outs),
   4874               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
   4875                    Int64Regs:$g),
   4876               "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   4877               []>;
   4878 def SUST_B_2D_V4B8_CLAMP
   4879   : NVPTXInst<(outs),
   4880               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   4881                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4882               "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
   4883               "\\{$r, $g, $b, $a\\};",
   4884               []>;
   4885 def SUST_B_2D_V4B16_CLAMP
   4886   : NVPTXInst<(outs),
   4887               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   4888                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4889              "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
   4890              "\\{$r, $g, $b, $a\\};",
   4891               []>;
   4892 def SUST_B_2D_V4B32_CLAMP
   4893   : NVPTXInst<(outs),
   4894               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   4895                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4896              "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
   4897              "\\{$r, $g, $b, $a\\};",
   4898               []>;
   4899 
   4900 
   4901 def SUST_B_2D_ARRAY_B8_CLAMP
   4902   : NVPTXInst<(outs),
   4903               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4904                    Int16Regs:$r),
   4905               "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   4906               []>;
   4907 def SUST_B_2D_ARRAY_B16_CLAMP
   4908   : NVPTXInst<(outs),
   4909               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4910                    Int16Regs:$r),
   4911               "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   4912               []>;
   4913 def SUST_B_2D_ARRAY_B32_CLAMP
   4914   : NVPTXInst<(outs),
   4915               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4916                    Int32Regs:$r),
   4917               "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   4918               []>;
   4919 def SUST_B_2D_ARRAY_B64_CLAMP
   4920   : NVPTXInst<(outs),
   4921               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4922                    Int64Regs:$r),
   4923               "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   4924               []>;
   4925 def SUST_B_2D_ARRAY_V2B8_CLAMP
   4926   : NVPTXInst<(outs),
   4927               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4928                    Int16Regs:$r, Int16Regs:$g),
   4929               "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4930               "\\{$r, $g\\};",
   4931               []>;
   4932 def SUST_B_2D_ARRAY_V2B16_CLAMP
   4933   : NVPTXInst<(outs),
   4934               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4935                    Int16Regs:$r, Int16Regs:$g),
   4936              "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4937              "\\{$r, $g\\};",
   4938               []>;
   4939 def SUST_B_2D_ARRAY_V2B32_CLAMP
   4940   : NVPTXInst<(outs),
   4941               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4942                    Int32Regs:$r, Int32Regs:$g),
   4943              "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4944              "\\{$r, $g\\};",
   4945               []>;
   4946 def SUST_B_2D_ARRAY_V2B64_CLAMP
   4947   : NVPTXInst<(outs),
   4948               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4949                    Int64Regs:$r, Int64Regs:$g),
   4950              "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4951              "\\{$r, $g\\};",
   4952               []>;
   4953 def SUST_B_2D_ARRAY_V4B8_CLAMP
   4954   : NVPTXInst<(outs),
   4955               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4956                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4957       "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4958       "\\{$r, $g, $b, $a\\};",
   4959               []>;
   4960 def SUST_B_2D_ARRAY_V4B16_CLAMP
   4961   : NVPTXInst<(outs),
   4962               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4963                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4964      "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4965      "\\{$r, $g, $b, $a\\};",
   4966               []>;
   4967 def SUST_B_2D_ARRAY_V4B32_CLAMP
   4968   : NVPTXInst<(outs),
   4969               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4970                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4971      "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4972      "\\{$r, $g, $b, $a\\};",
   4973               []>;
   4974 
   4975 
   4976 def SUST_B_3D_B8_CLAMP
   4977   : NVPTXInst<(outs),
   4978               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4979                    Int16Regs:$r),
   4980               "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   4981               []>;
   4982 def SUST_B_3D_B16_CLAMP
   4983   : NVPTXInst<(outs),
   4984               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4985                    Int16Regs:$r),
   4986               "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   4987               []>;
   4988 def SUST_B_3D_B32_CLAMP
   4989   : NVPTXInst<(outs),
   4990               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4991                    Int32Regs:$r),
   4992               "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   4993               []>;
   4994 def SUST_B_3D_B64_CLAMP
   4995   : NVPTXInst<(outs),
   4996               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4997                    Int64Regs:$r),
   4998               "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   4999               []>;
   5000 def SUST_B_3D_V2B8_CLAMP
   5001   : NVPTXInst<(outs),
   5002               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5003                    Int16Regs:$r, Int16Regs:$g),
   5004               "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   5005               "\\{$r, $g\\};",
   5006               []>;
   5007 def SUST_B_3D_V2B16_CLAMP
   5008   : NVPTXInst<(outs),
   5009               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5010                    Int16Regs:$r, Int16Regs:$g),
   5011               "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   5012               "\\{$r, $g\\};",
   5013               []>;
   5014 def SUST_B_3D_V2B32_CLAMP
   5015   : NVPTXInst<(outs),
   5016               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5017                    Int32Regs:$r, Int32Regs:$g),
   5018               "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   5019               "\\{$r, $g\\};",
   5020               []>;
   5021 def SUST_B_3D_V2B64_CLAMP
   5022   : NVPTXInst<(outs),
   5023               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5024                    Int64Regs:$r, Int64Regs:$g),
   5025               "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   5026               "\\{$r, $g\\};",
   5027               []>;
   5028 def SUST_B_3D_V4B8_CLAMP
   5029   : NVPTXInst<(outs),
   5030               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5031                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5032          "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   5033          "\\{$r, $g, $b, $a\\};",
   5034               []>;
   5035 def SUST_B_3D_V4B16_CLAMP
   5036   : NVPTXInst<(outs),
   5037               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5038                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5039         "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   5040         "\\{$r, $g, $b, $a\\};",
   5041               []>;
   5042 def SUST_B_3D_V4B32_CLAMP
   5043   : NVPTXInst<(outs),
   5044               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5045                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5046         "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   5047         "\\{$r, $g, $b, $a\\};",
   5048               []>;
   5049 
   5050 
   5051 // .trap variant
   5052 def SUST_B_1D_B8_TRAP
   5053   : NVPTXInst<(outs),
   5054               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   5055               "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
   5056               []>;
   5057 def SUST_B_1D_B16_TRAP
   5058   : NVPTXInst<(outs),
   5059               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   5060               "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
   5061               []>;
   5062 def SUST_B_1D_B32_TRAP
   5063   : NVPTXInst<(outs),
   5064               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   5065               "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
   5066               []>;
   5067 def SUST_B_1D_B64_TRAP
   5068   : NVPTXInst<(outs),
   5069               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
   5070               "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
   5071               []>;
   5072 def SUST_B_1D_V2B8_TRAP
   5073   : NVPTXInst<(outs),
   5074               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5075               "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5076               []>;
   5077 def SUST_B_1D_V2B16_TRAP
   5078   : NVPTXInst<(outs),
   5079               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5080               "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5081               []>;
   5082 def SUST_B_1D_V2B32_TRAP
   5083   : NVPTXInst<(outs),
   5084               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   5085               "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5086               []>;
   5087 def SUST_B_1D_V2B64_TRAP
   5088   : NVPTXInst<(outs),
   5089               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   5090               "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5091               []>;
   5092 def SUST_B_1D_V4B8_TRAP
   5093   : NVPTXInst<(outs),
   5094               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   5095                    Int16Regs:$b, Int16Regs:$a),
   5096               "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5097               []>;
   5098 def SUST_B_1D_V4B16_TRAP
   5099   : NVPTXInst<(outs),
   5100               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   5101                    Int16Regs:$b, Int16Regs:$a),
   5102               "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5103               []>;
   5104 def SUST_B_1D_V4B32_TRAP
   5105   : NVPTXInst<(outs),
   5106               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
   5107                    Int32Regs:$b, Int32Regs:$a),
   5108               "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5109               []>;
   5110 
   5111 
   5112 def SUST_B_1D_ARRAY_B8_TRAP
   5113   : NVPTXInst<(outs),
   5114               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   5115               "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5116               []>;
   5117 def SUST_B_1D_ARRAY_B16_TRAP
   5118   : NVPTXInst<(outs),
   5119               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   5120               "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5121               []>;
   5122 def SUST_B_1D_ARRAY_B32_TRAP
   5123   : NVPTXInst<(outs),
   5124               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
   5125               "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5126               []>;
   5127 def SUST_B_1D_ARRAY_B64_TRAP
   5128   : NVPTXInst<(outs),
   5129               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
   5130               "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5131               []>;
   5132 def SUST_B_1D_ARRAY_V2B8_TRAP
   5133   : NVPTXInst<(outs),
   5134               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5135                    Int16Regs:$g),
   5136               "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5137               []>;
   5138 def SUST_B_1D_ARRAY_V2B16_TRAP
   5139   : NVPTXInst<(outs),
   5140               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5141                    Int16Regs:$g),
   5142               "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5143               []>;
   5144 def SUST_B_1D_ARRAY_V2B32_TRAP
   5145   : NVPTXInst<(outs),
   5146               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   5147                    Int32Regs:$g),
   5148               "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5149               []>;
   5150 def SUST_B_1D_ARRAY_V2B64_TRAP
   5151   : NVPTXInst<(outs),
   5152               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
   5153                    Int64Regs:$g),
   5154               "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5155               []>;
   5156 def SUST_B_1D_ARRAY_V4B8_TRAP
   5157   : NVPTXInst<(outs),
   5158               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5159                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5160               "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
   5161               "\\{$r, $g, $b, $a\\};",
   5162               []>;
   5163 def SUST_B_1D_ARRAY_V4B16_TRAP
   5164   : NVPTXInst<(outs),
   5165               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5166                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5167              "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
   5168              "\\{$r, $g, $b, $a\\};",
   5169               []>;
   5170 def SUST_B_1D_ARRAY_V4B32_TRAP
   5171   : NVPTXInst<(outs),
   5172               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   5173                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5174              "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
   5175              "\\{$r, $g, $b, $a\\};",
   5176               []>;
   5177 
   5178 
   5179 def SUST_B_2D_B8_TRAP
   5180   : NVPTXInst<(outs),
   5181               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5182               "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5183               []>;
   5184 def SUST_B_2D_B16_TRAP
   5185   : NVPTXInst<(outs),
   5186               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5187               "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5188               []>;
   5189 def SUST_B_2D_B32_TRAP
   5190   : NVPTXInst<(outs),
   5191               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   5192               "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5193               []>;
   5194 def SUST_B_2D_B64_TRAP
   5195   : NVPTXInst<(outs),
   5196               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   5197               "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5198               []>;
   5199 def SUST_B_2D_V2B8_TRAP
   5200   : NVPTXInst<(outs),
   5201               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5202                    Int16Regs:$g),
   5203               "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5204               []>;
   5205 def SUST_B_2D_V2B16_TRAP
   5206   : NVPTXInst<(outs),
   5207               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5208                    Int16Regs:$g),
   5209               "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5210               []>;
   5211 def SUST_B_2D_V2B32_TRAP
   5212   : NVPTXInst<(outs),
   5213               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   5214                    Int32Regs:$g),
   5215               "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5216               []>;
   5217 def SUST_B_2D_V2B64_TRAP
   5218   : NVPTXInst<(outs),
   5219               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
   5220                    Int64Regs:$g),
   5221               "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5222               []>;
   5223 def SUST_B_2D_V4B8_TRAP
   5224   : NVPTXInst<(outs),
   5225               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5226                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5227               "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
   5228               "\\{$r, $g, $b, $a\\};",
   5229               []>;
   5230 def SUST_B_2D_V4B16_TRAP
   5231   : NVPTXInst<(outs),
   5232               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5233                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5234              "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
   5235              "\\{$r, $g, $b, $a\\};",
   5236               []>;
   5237 def SUST_B_2D_V4B32_TRAP
   5238   : NVPTXInst<(outs),
   5239               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   5240                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5241              "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
   5242              "\\{$r, $g, $b, $a\\};",
   5243               []>;
   5244 
   5245 
   5246 def SUST_B_2D_ARRAY_B8_TRAP
   5247   : NVPTXInst<(outs),
   5248               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5249                    Int16Regs:$r),
   5250               "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5251               []>;
   5252 def SUST_B_2D_ARRAY_B16_TRAP
   5253   : NVPTXInst<(outs),
   5254               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5255                    Int16Regs:$r),
   5256               "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5257               []>;
   5258 def SUST_B_2D_ARRAY_B32_TRAP
   5259   : NVPTXInst<(outs),
   5260               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5261                    Int32Regs:$r),
   5262               "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5263               []>;
   5264 def SUST_B_2D_ARRAY_B64_TRAP
   5265   : NVPTXInst<(outs),
   5266               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5267                    Int64Regs:$r),
   5268               "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5269               []>;
   5270 def SUST_B_2D_ARRAY_V2B8_TRAP
   5271   : NVPTXInst<(outs),
   5272               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5273                    Int16Regs:$r, Int16Regs:$g),
   5274               "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5275               "\\{$r, $g\\};",
   5276               []>;
   5277 def SUST_B_2D_ARRAY_V2B16_TRAP
   5278   : NVPTXInst<(outs),
   5279               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5280                    Int16Regs:$r, Int16Regs:$g),
   5281              "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5282              "\\{$r, $g\\};",
   5283               []>;
   5284 def SUST_B_2D_ARRAY_V2B32_TRAP
   5285   : NVPTXInst<(outs),
   5286               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5287                    Int32Regs:$r, Int32Regs:$g),
   5288              "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5289              "\\{$r, $g\\};",
   5290               []>;
   5291 def SUST_B_2D_ARRAY_V2B64_TRAP
   5292   : NVPTXInst<(outs),
   5293               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5294                    Int64Regs:$r, Int64Regs:$g),
   5295              "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5296              "\\{$r, $g\\};",
   5297               []>;
   5298 def SUST_B_2D_ARRAY_V4B8_TRAP
   5299   : NVPTXInst<(outs),
   5300               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5301                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5302       "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5303       "\\{$r, $g, $b, $a\\};",
   5304               []>;
   5305 def SUST_B_2D_ARRAY_V4B16_TRAP
   5306   : NVPTXInst<(outs),
   5307               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5308                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5309      "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5310      "\\{$r, $g, $b, $a\\};",
   5311               []>;
   5312 def SUST_B_2D_ARRAY_V4B32_TRAP
   5313   : NVPTXInst<(outs),
   5314               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5315                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5316      "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5317      "\\{$r, $g, $b, $a\\};",
   5318               []>;
   5319 
   5320 
   5321 def SUST_B_3D_B8_TRAP
   5322   : NVPTXInst<(outs),
   5323               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5324                    Int16Regs:$r),
   5325               "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5326               []>;
   5327 def SUST_B_3D_B16_TRAP
   5328   : NVPTXInst<(outs),
   5329               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5330                    Int16Regs:$r),
   5331               "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5332               []>;
   5333 def SUST_B_3D_B32_TRAP
   5334   : NVPTXInst<(outs),
   5335               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5336                    Int32Regs:$r),
   5337               "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5338               []>;
   5339 def SUST_B_3D_B64_TRAP
   5340   : NVPTXInst<(outs),
   5341               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5342                    Int64Regs:$r),
   5343               "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5344               []>;
   5345 def SUST_B_3D_V2B8_TRAP
   5346   : NVPTXInst<(outs),
   5347               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5348                    Int16Regs:$r, Int16Regs:$g),
   5349               "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5350               "\\{$r, $g\\};",
   5351               []>;
   5352 def SUST_B_3D_V2B16_TRAP
   5353   : NVPTXInst<(outs),
   5354               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5355                    Int16Regs:$r, Int16Regs:$g),
   5356               "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5357               "\\{$r, $g\\};",
   5358               []>;
   5359 def SUST_B_3D_V2B32_TRAP
   5360   : NVPTXInst<(outs),
   5361               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5362                    Int32Regs:$r, Int32Regs:$g),
   5363               "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5364               "\\{$r, $g\\};",
   5365               []>;
   5366 def SUST_B_3D_V2B64_TRAP
   5367   : NVPTXInst<(outs),
   5368               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5369                    Int64Regs:$r, Int64Regs:$g),
   5370               "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5371               "\\{$r, $g\\};",
   5372               []>;
   5373 def SUST_B_3D_V4B8_TRAP
   5374   : NVPTXInst<(outs),
   5375               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5376                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5377          "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5378          "\\{$r, $g, $b, $a\\};",
   5379               []>;
   5380 def SUST_B_3D_V4B16_TRAP
   5381   : NVPTXInst<(outs),
   5382               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5383                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5384         "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5385         "\\{$r, $g, $b, $a\\};",
   5386               []>;
   5387 def SUST_B_3D_V4B32_TRAP
   5388   : NVPTXInst<(outs),
   5389               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5390                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5391         "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5392         "\\{$r, $g, $b, $a\\};",
   5393               []>;
   5394 
   5395 
   5396 // .zero variant
   5397 def SUST_B_1D_B8_ZERO
   5398   : NVPTXInst<(outs),
   5399               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   5400               "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
   5401               []>;
   5402 def SUST_B_1D_B16_ZERO
   5403   : NVPTXInst<(outs),
   5404               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   5405               "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
   5406               []>;
   5407 def SUST_B_1D_B32_ZERO
   5408   : NVPTXInst<(outs),
   5409               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   5410               "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
   5411               []>;
   5412 def SUST_B_1D_B64_ZERO
   5413   : NVPTXInst<(outs),
   5414               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
   5415               "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
   5416               []>;
   5417 def SUST_B_1D_V2B8_ZERO
   5418   : NVPTXInst<(outs),
   5419               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5420               "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5421               []>;
   5422 def SUST_B_1D_V2B16_ZERO
   5423   : NVPTXInst<(outs),
   5424               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5425               "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5426               []>;
   5427 def SUST_B_1D_V2B32_ZERO
   5428   : NVPTXInst<(outs),
   5429               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   5430               "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5431               []>;
   5432 def SUST_B_1D_V2B64_ZERO
   5433   : NVPTXInst<(outs),
   5434               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   5435               "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5436               []>;
   5437 def SUST_B_1D_V4B8_ZERO
   5438   : NVPTXInst<(outs),
   5439               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   5440                    Int16Regs:$b, Int16Regs:$a),
   5441               "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5442               []>;
   5443 def SUST_B_1D_V4B16_ZERO
   5444   : NVPTXInst<(outs),
   5445               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   5446                    Int16Regs:$b, Int16Regs:$a),
   5447               "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5448               []>;
   5449 def SUST_B_1D_V4B32_ZERO
   5450   : NVPTXInst<(outs),
   5451               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
   5452                    Int32Regs:$b, Int32Regs:$a),
   5453               "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5454               []>;
   5455 
   5456 
   5457 def SUST_B_1D_ARRAY_B8_ZERO
   5458   : NVPTXInst<(outs),
   5459               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   5460               "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5461               []>;
   5462 def SUST_B_1D_ARRAY_B16_ZERO
   5463   : NVPTXInst<(outs),
   5464               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   5465               "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5466               []>;
   5467 def SUST_B_1D_ARRAY_B32_ZERO
   5468   : NVPTXInst<(outs),
   5469               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
   5470               "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5471               []>;
   5472 def SUST_B_1D_ARRAY_B64_ZERO
   5473   : NVPTXInst<(outs),
   5474               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
   5475               "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5476               []>;
   5477 def SUST_B_1D_ARRAY_V2B8_ZERO
   5478   : NVPTXInst<(outs),
   5479               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5480                    Int16Regs:$g),
   5481               "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5482               []>;
   5483 def SUST_B_1D_ARRAY_V2B16_ZERO
   5484   : NVPTXInst<(outs),
   5485               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5486                    Int16Regs:$g),
   5487               "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5488               []>;
   5489 def SUST_B_1D_ARRAY_V2B32_ZERO
   5490   : NVPTXInst<(outs),
   5491               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   5492                    Int32Regs:$g),
   5493               "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5494               []>;
   5495 def SUST_B_1D_ARRAY_V2B64_ZERO
   5496   : NVPTXInst<(outs),
   5497               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
   5498                    Int64Regs:$g),
   5499               "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5500               []>;
   5501 def SUST_B_1D_ARRAY_V4B8_ZERO
   5502   : NVPTXInst<(outs),
   5503               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5504                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5505               "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
   5506               "\\{$r, $g, $b, $a\\};",
   5507               []>;
   5508 def SUST_B_1D_ARRAY_V4B16_ZERO
   5509   : NVPTXInst<(outs),
   5510               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5511                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5512              "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
   5513              "\\{$r, $g, $b, $a\\};",
   5514               []>;
   5515 def SUST_B_1D_ARRAY_V4B32_ZERO
   5516   : NVPTXInst<(outs),
   5517               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   5518                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5519              "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
   5520              "\\{$r, $g, $b, $a\\};",
   5521               []>;
   5522 
   5523 
   5524 def SUST_B_2D_B8_ZERO
   5525   : NVPTXInst<(outs),
   5526               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5527               "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5528               []>;
   5529 def SUST_B_2D_B16_ZERO
   5530   : NVPTXInst<(outs),
   5531               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5532               "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5533               []>;
   5534 def SUST_B_2D_B32_ZERO
   5535   : NVPTXInst<(outs),
   5536               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   5537               "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5538               []>;
   5539 def SUST_B_2D_B64_ZERO
   5540   : NVPTXInst<(outs),
   5541               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   5542               "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5543               []>;
   5544 def SUST_B_2D_V2B8_ZERO
   5545   : NVPTXInst<(outs),
   5546               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5547                    Int16Regs:$g),
   5548               "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5549               []>;
   5550 def SUST_B_2D_V2B16_ZERO
   5551   : NVPTXInst<(outs),
   5552               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5553                    Int16Regs:$g),
   5554               "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5555               []>;
   5556 def SUST_B_2D_V2B32_ZERO
   5557   : NVPTXInst<(outs),
   5558               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   5559                    Int32Regs:$g),
   5560               "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5561               []>;
   5562 def SUST_B_2D_V2B64_ZERO
   5563   : NVPTXInst<(outs),
   5564               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
   5565                    Int64Regs:$g),
   5566               "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5567               []>;
   5568 def SUST_B_2D_V4B8_ZERO
   5569   : NVPTXInst<(outs),
   5570               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5571                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5572               "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
   5573               "\\{$r, $g, $b, $a\\};",
   5574               []>;
   5575 def SUST_B_2D_V4B16_ZERO
   5576   : NVPTXInst<(outs),
   5577               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5578                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5579              "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
   5580              "\\{$r, $g, $b, $a\\};",
   5581               []>;
   5582 def SUST_B_2D_V4B32_ZERO
   5583   : NVPTXInst<(outs),
   5584               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   5585                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5586              "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
   5587              "\\{$r, $g, $b, $a\\};",
   5588               []>;
   5589 
   5590 
   5591 def SUST_B_2D_ARRAY_B8_ZERO
   5592   : NVPTXInst<(outs),
   5593               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5594                    Int16Regs:$r),
   5595               "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5596               []>;
   5597 def SUST_B_2D_ARRAY_B16_ZERO
   5598   : NVPTXInst<(outs),
   5599               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5600                    Int16Regs:$r),
   5601               "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5602               []>;
   5603 def SUST_B_2D_ARRAY_B32_ZERO
   5604   : NVPTXInst<(outs),
   5605               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5606                    Int32Regs:$r),
   5607               "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5608               []>;
   5609 def SUST_B_2D_ARRAY_B64_ZERO
   5610   : NVPTXInst<(outs),
   5611               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5612                    Int64Regs:$r),
   5613               "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5614               []>;
   5615 def SUST_B_2D_ARRAY_V2B8_ZERO
   5616   : NVPTXInst<(outs),
   5617               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5618                    Int16Regs:$r, Int16Regs:$g),
   5619               "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5620               "\\{$r, $g\\};",
   5621               []>;
   5622 def SUST_B_2D_ARRAY_V2B16_ZERO
   5623   : NVPTXInst<(outs),
   5624               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5625                    Int16Regs:$r, Int16Regs:$g),
   5626              "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5627              "\\{$r, $g\\};",
   5628               []>;
   5629 def SUST_B_2D_ARRAY_V2B32_ZERO
   5630   : NVPTXInst<(outs),
   5631               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5632                    Int32Regs:$r, Int32Regs:$g),
   5633              "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5634              "\\{$r, $g\\};",
   5635               []>;
   5636 def SUST_B_2D_ARRAY_V2B64_ZERO
   5637   : NVPTXInst<(outs),
   5638               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5639                    Int64Regs:$r, Int64Regs:$g),
   5640              "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5641              "\\{$r, $g\\};",
   5642               []>;
   5643 def SUST_B_2D_ARRAY_V4B8_ZERO
   5644   : NVPTXInst<(outs),
   5645               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5646                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5647       "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5648       "\\{$r, $g, $b, $a\\};",
   5649               []>;
   5650 def SUST_B_2D_ARRAY_V4B16_ZERO
   5651   : NVPTXInst<(outs),
   5652               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5653                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5654      "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5655      "\\{$r, $g, $b, $a\\};",
   5656               []>;
   5657 def SUST_B_2D_ARRAY_V4B32_ZERO
   5658   : NVPTXInst<(outs),
   5659               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5660                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5661      "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5662      "\\{$r, $g, $b, $a\\};",
   5663               []>;
   5664 
   5665 
   5666 def SUST_B_3D_B8_ZERO
   5667   : NVPTXInst<(outs),
   5668               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5669                    Int16Regs:$r),
   5670               "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5671               []>;
   5672 def SUST_B_3D_B16_ZERO
   5673   : NVPTXInst<(outs),
   5674               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5675                    Int16Regs:$r),
   5676               "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5677               []>;
   5678 def SUST_B_3D_B32_ZERO
   5679   : NVPTXInst<(outs),
   5680               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5681                    Int32Regs:$r),
   5682               "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5683               []>;
   5684 def SUST_B_3D_B64_ZERO
   5685   : NVPTXInst<(outs),
   5686               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5687                    Int64Regs:$r),
   5688               "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5689               []>;
   5690 def SUST_B_3D_V2B8_ZERO
   5691   : NVPTXInst<(outs),
   5692               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5693                    Int16Regs:$r, Int16Regs:$g),
   5694               "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5695               "\\{$r, $g\\};",
   5696               []>;
   5697 def SUST_B_3D_V2B16_ZERO
   5698   : NVPTXInst<(outs),
   5699               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5700                    Int16Regs:$r, Int16Regs:$g),
   5701               "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5702               "\\{$r, $g\\};",
   5703               []>;
   5704 def SUST_B_3D_V2B32_ZERO
   5705   : NVPTXInst<(outs),
   5706               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5707                    Int32Regs:$r, Int32Regs:$g),
   5708               "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5709               "\\{$r, $g\\};",
   5710               []>;
   5711 def SUST_B_3D_V2B64_ZERO
   5712   : NVPTXInst<(outs),
   5713               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5714                    Int64Regs:$r, Int64Regs:$g),
   5715               "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5716               "\\{$r, $g\\};",
   5717               []>;
   5718 def SUST_B_3D_V4B8_ZERO
   5719   : NVPTXInst<(outs),
   5720               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5721                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5722          "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5723          "\\{$r, $g, $b, $a\\};",
   5724               []>;
   5725 def SUST_B_3D_V4B16_ZERO
   5726   : NVPTXInst<(outs),
   5727               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5728                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5729         "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5730         "\\{$r, $g, $b, $a\\};",
   5731               []>;
   5732 def SUST_B_3D_V4B32_ZERO
   5733   : NVPTXInst<(outs),
   5734               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5735                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5736         "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5737         "\\{$r, $g, $b, $a\\};",
   5738               []>;
   5739 
   5740 
   5741 
   5742 // Formatted
   5743 
   5744 def SUST_P_1D_B8_TRAP
   5745   : NVPTXInst<(outs),
   5746               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   5747               "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
   5748               []>;
   5749 def SUST_P_1D_B16_TRAP
   5750   : NVPTXInst<(outs),
   5751               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   5752               "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
   5753               []>;
   5754 def SUST_P_1D_B32_TRAP
   5755   : NVPTXInst<(outs),
   5756               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   5757               "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
   5758               []>;
   5759 def SUST_P_1D_V2B8_TRAP
   5760   : NVPTXInst<(outs),
   5761               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5762               "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5763               []>;
   5764 def SUST_P_1D_V2B16_TRAP
   5765   : NVPTXInst<(outs),
   5766               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5767               "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5768               []>;
   5769 def SUST_P_1D_V2B32_TRAP
   5770   : NVPTXInst<(outs),
   5771               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   5772               "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5773               []>;
   5774 def SUST_P_1D_V4B8_TRAP
   5775   : NVPTXInst<(outs),
   5776               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   5777                    Int16Regs:$b, Int16Regs:$a),
   5778               "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5779               []>;
   5780 def SUST_P_1D_V4B16_TRAP
   5781   : NVPTXInst<(outs),
   5782               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   5783                    Int16Regs:$b, Int16Regs:$a),
   5784               "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5785               []>;
   5786 def SUST_P_1D_V4B32_TRAP
   5787   : NVPTXInst<(outs),
   5788               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
   5789                    Int32Regs:$b, Int32Regs:$a),
   5790               "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5791               []>;
   5792 
   5793 
   5794 def SUST_P_1D_ARRAY_B8_TRAP
   5795   : NVPTXInst<(outs),
   5796               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   5797               "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5798               []>;
   5799 def SUST_P_1D_ARRAY_B16_TRAP
   5800   : NVPTXInst<(outs),
   5801               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   5802               "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5803               []>;
   5804 def SUST_P_1D_ARRAY_B32_TRAP
   5805   : NVPTXInst<(outs),
   5806               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
   5807               "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5808               []>;
   5809 def SUST_P_1D_ARRAY_V2B8_TRAP
   5810   : NVPTXInst<(outs),
   5811               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5812                    Int16Regs:$g),
   5813               "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5814               []>;
   5815 def SUST_P_1D_ARRAY_V2B16_TRAP
   5816   : NVPTXInst<(outs),
   5817               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5818                    Int16Regs:$g),
   5819               "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5820               []>;
   5821 def SUST_P_1D_ARRAY_V2B32_TRAP
   5822   : NVPTXInst<(outs),
   5823               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   5824                    Int32Regs:$g),
   5825               "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5826               []>;
   5827 def SUST_P_1D_ARRAY_V4B8_TRAP
   5828   : NVPTXInst<(outs),
   5829               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5830                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5831               "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
   5832               "\\{$r, $g, $b, $a\\};",
   5833               []>;
   5834 def SUST_P_1D_ARRAY_V4B16_TRAP
   5835   : NVPTXInst<(outs),
   5836               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5837                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5838              "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
   5839              "\\{$r, $g, $b, $a\\};",
   5840               []>;
   5841 def SUST_P_1D_ARRAY_V4B32_TRAP
   5842   : NVPTXInst<(outs),
   5843               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   5844                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5845              "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
   5846              "\\{$r, $g, $b, $a\\};",
   5847               []>;
   5848 
   5849 
   5850 def SUST_P_2D_B8_TRAP
   5851   : NVPTXInst<(outs),
   5852               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5853               "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5854               []>;
   5855 def SUST_P_2D_B16_TRAP
   5856   : NVPTXInst<(outs),
   5857               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5858               "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5859               []>;
   5860 def SUST_P_2D_B32_TRAP
   5861   : NVPTXInst<(outs),
   5862               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   5863               "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5864               []>;
   5865 def SUST_P_2D_V2B8_TRAP
   5866   : NVPTXInst<(outs),
   5867               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5868                    Int16Regs:$g),
   5869               "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5870               []>;
   5871 def SUST_P_2D_V2B16_TRAP
   5872   : NVPTXInst<(outs),
   5873               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5874                    Int16Regs:$g),
   5875               "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5876               []>;
   5877 def SUST_P_2D_V2B32_TRAP
   5878   : NVPTXInst<(outs),
   5879               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   5880                    Int32Regs:$g),
   5881               "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5882               []>;
   5883 def SUST_P_2D_V4B8_TRAP
   5884   : NVPTXInst<(outs),
   5885               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5886                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5887               "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
   5888               "\\{$r, $g, $b, $a\\};",
   5889               []>;
   5890 def SUST_P_2D_V4B16_TRAP
   5891   : NVPTXInst<(outs),
   5892               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5893                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5894              "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
   5895              "\\{$r, $g, $b, $a\\};",
   5896               []>;
   5897 def SUST_P_2D_V4B32_TRAP
   5898   : NVPTXInst<(outs),
   5899               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   5900                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5901              "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
   5902              "\\{$r, $g, $b, $a\\};",
   5903               []>;
   5904 
   5905 
   5906 def SUST_P_2D_ARRAY_B8_TRAP
   5907   : NVPTXInst<(outs),
   5908               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5909                    Int16Regs:$r),
   5910               "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5911               []>;
   5912 def SUST_P_2D_ARRAY_B16_TRAP
   5913   : NVPTXInst<(outs),
   5914               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5915                    Int16Regs:$r),
   5916               "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5917               []>;
   5918 def SUST_P_2D_ARRAY_B32_TRAP
   5919   : NVPTXInst<(outs),
   5920               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5921                    Int32Regs:$r),
   5922               "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5923               []>;
   5924 def SUST_P_2D_ARRAY_V2B8_TRAP
   5925   : NVPTXInst<(outs),
   5926               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5927                    Int16Regs:$r, Int16Regs:$g),
   5928               "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5929               "\\{$r, $g\\};",
   5930               []>;
   5931 def SUST_P_2D_ARRAY_V2B16_TRAP
   5932   : NVPTXInst<(outs),
   5933               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5934                    Int16Regs:$r, Int16Regs:$g),
   5935              "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5936              "\\{$r, $g\\};",
   5937               []>;
   5938 def SUST_P_2D_ARRAY_V2B32_TRAP
   5939   : NVPTXInst<(outs),
   5940               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5941                    Int32Regs:$r, Int32Regs:$g),
   5942              "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5943              "\\{$r, $g\\};",
   5944               []>;
   5945 def SUST_P_2D_ARRAY_V4B8_TRAP
   5946   : NVPTXInst<(outs),
   5947               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5948                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5949       "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5950       "\\{$r, $g, $b, $a\\};",
   5951               []>;
   5952 def SUST_P_2D_ARRAY_V4B16_TRAP
   5953   : NVPTXInst<(outs),
   5954               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5955                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5956      "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5957      "\\{$r, $g, $b, $a\\};",
   5958               []>;
   5959 def SUST_P_2D_ARRAY_V4B32_TRAP
   5960   : NVPTXInst<(outs),
   5961               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5962                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5963      "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5964      "\\{$r, $g, $b, $a\\};",
   5965               []>;
   5966 
   5967 
   5968 def SUST_P_3D_B8_TRAP
   5969   : NVPTXInst<(outs),
   5970               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5971                    Int16Regs:$r),
   5972               "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5973               []>;
   5974 def SUST_P_3D_B16_TRAP
   5975   : NVPTXInst<(outs),
   5976               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5977                    Int16Regs:$r),
   5978               "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5979               []>;
   5980 def SUST_P_3D_B32_TRAP
   5981   : NVPTXInst<(outs),
   5982               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5983                    Int32Regs:$r),
   5984               "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5985               []>;
   5986 def SUST_P_3D_V2B8_TRAP
   5987   : NVPTXInst<(outs),
   5988               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5989                    Int16Regs:$r, Int16Regs:$g),
   5990               "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5991               "\\{$r, $g\\};",
   5992               []>;
   5993 def SUST_P_3D_V2B16_TRAP
   5994   : NVPTXInst<(outs),
   5995               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5996                    Int16Regs:$r, Int16Regs:$g),
   5997               "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5998               "\\{$r, $g\\};",
   5999               []>;
   6000 def SUST_P_3D_V2B32_TRAP
   6001   : NVPTXInst<(outs),
   6002               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6003                    Int32Regs:$r, Int32Regs:$g),
   6004               "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   6005               "\\{$r, $g\\};",
   6006               []>;
   6007 def SUST_P_3D_V4B8_TRAP
   6008   : NVPTXInst<(outs),
   6009               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6010                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6011          "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   6012          "\\{$r, $g, $b, $a\\};",
   6013               []>;
   6014 def SUST_P_3D_V4B16_TRAP
   6015   : NVPTXInst<(outs),
   6016               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6017                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6018         "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   6019         "\\{$r, $g, $b, $a\\};",
   6020               []>;
   6021 def SUST_P_3D_V4B32_TRAP
   6022   : NVPTXInst<(outs),
   6023               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6024                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6025         "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   6026         "\\{$r, $g, $b, $a\\};",
   6027               []>;
   6028 }
   6029 
   6030 // Surface store instruction patterns
   6031 // I'm not sure why we can't just include these in the instruction definitions,
   6032 // but TableGen complains of type errors :(
   6033 
   6034 // .clamp variant
   6035 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
   6036            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   6037           (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   6038 
   6039 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
   6040            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   6041           (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   6042 
   6043 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
   6044            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   6045           (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
   6046 
   6047 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
   6048            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
   6049           (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
   6050 
   6051 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
   6052            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6053           (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
   6054            Int16Regs:$r, Int16Regs:$g)>;
   6055 
   6056 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
   6057            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6058           (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
   6059            Int16Regs:$r, Int16Regs:$g)>;
   6060 
   6061 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
   6062            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   6063           (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
   6064            Int32Regs:$r, Int32Regs:$g)>;
   6065 
   6066 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
   6067            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   6068           (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
   6069            Int64Regs:$r, Int64Regs:$g)>;
   6070 
   6071 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
   6072            Int64Regs:$s, Int32Regs:$x,
   6073            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6074           (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
   6075            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6076 
   6077 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
   6078            Int64Regs:$s, Int32Regs:$x,
   6079            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6080           (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
   6081            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6082 
   6083 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
   6084            Int64Regs:$s, Int32Regs:$x,
   6085            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6086           (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
   6087            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6088 
   6089 
   6090 
   6091 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
   6092            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   6093           (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6094            Int16Regs:$r)>;
   6095 
   6096 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
   6097            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   6098           (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6099            Int16Regs:$r)>;
   6100 
   6101 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
   6102            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
   6103           (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6104            Int32Regs:$r)>;
   6105 
   6106 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
   6107            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
   6108           (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6109            Int64Regs:$r)>;
   6110 
   6111 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
   6112           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6113           (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6114            Int16Regs:$r, Int16Regs:$g)>;
   6115 
   6116 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
   6117           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6118           (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6119            Int16Regs:$r, Int16Regs:$g)>;
   6120 
   6121 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
   6122           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   6123           (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6124            Int32Regs:$r, Int32Regs:$g)>;
   6125 
   6126 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
   6127           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   6128           (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6129            Int64Regs:$r, Int64Regs:$g)>;
   6130 
   6131 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
   6132            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6133            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6134           (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6135            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6136 
   6137 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
   6138            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6139            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6140           (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6141            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6142 
   6143 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
   6144            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6145            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6146           (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6147            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6148 
   6149 
   6150 
   6151 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
   6152            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6153           (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6154            Int16Regs:$r)>;
   6155 
   6156 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
   6157            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6158           (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6159            Int16Regs:$r)>;
   6160 
   6161 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
   6162            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   6163           (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6164            Int32Regs:$r)>;
   6165 
   6166 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
   6167            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   6168           (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6169            Int64Regs:$r)>;
   6170 
   6171 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
   6172           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   6173           (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6174            Int16Regs:$r, Int16Regs:$g)>;
   6175 
   6176 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
   6177           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   6178           (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6179            Int16Regs:$r, Int16Regs:$g)>;
   6180 
   6181 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
   6182           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
   6183           (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6184            Int32Regs:$r, Int32Regs:$g)>;
   6185 
   6186 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
   6187           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
   6188           (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6189            Int64Regs:$r, Int64Regs:$g)>;
   6190 
   6191 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
   6192            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6193            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6194           (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6195            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6196 
   6197 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
   6198            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6199            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6200           (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6201            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6202 
   6203 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
   6204            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6205            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6206           (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6207            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6208 
   6209 
   6210 
   6211 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
   6212           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6213           (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
   6214            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6215            Int16Regs:$r)>;
   6216 
   6217 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
   6218           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6219           (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
   6220            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6221            Int16Regs:$r)>;
   6222 
   6223 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
   6224           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   6225           (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
   6226            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6227            Int32Regs:$r)>;
   6228 
   6229 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
   6230           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   6231           (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
   6232            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6233            Int64Regs:$r)>;
   6234 
   6235 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
   6236            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6237            Int16Regs:$r, Int16Regs:$g),
   6238           (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
   6239            Int32Regs:$x, Int32Regs:$y,
   6240            Int16Regs:$r, Int16Regs:$g)>;
   6241 
   6242 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
   6243            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6244            Int16Regs:$r, Int16Regs:$g),
   6245           (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
   6246            Int32Regs:$x, Int32Regs:$y,
   6247            Int16Regs:$r, Int16Regs:$g)>;
   6248 
   6249 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
   6250            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   6251            Int32Regs:$g),
   6252           (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
   6253            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
   6254 
   6255 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
   6256            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
   6257            Int64Regs:$g),
   6258           (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
   6259            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
   6260 
   6261 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
   6262            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6263            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6264           (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
   6265            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6266            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6267 
   6268 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
   6269            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6270            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6271           (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
   6272            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6273            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6274 
   6275 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
   6276            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6277            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6278           (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
   6279            Int32Regs:$x, Int32Regs:$y,
   6280            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6281 
   6282 
   6283 
   6284 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
   6285            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6286            Int16Regs:$r),
   6287           (SUST_B_3D_B8_CLAMP Int64Regs:$s,
   6288            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6289            Int16Regs:$r)>;
   6290 
   6291 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
   6292            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6293            Int16Regs:$r),
   6294           (SUST_B_3D_B16_CLAMP Int64Regs:$s,
   6295            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6296            Int16Regs:$r)>;
   6297 
   6298 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
   6299            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6300            Int32Regs:$r),
   6301           (SUST_B_3D_B32_CLAMP Int64Regs:$s,
   6302            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6303            Int32Regs:$r)>;
   6304 
   6305 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
   6306            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6307            Int64Regs:$r),
   6308           (SUST_B_3D_B64_CLAMP Int64Regs:$s,
   6309            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6310            Int64Regs:$r)>;
   6311 
   6312 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
   6313            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6314            Int16Regs:$r, Int16Regs:$g),
   6315           (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
   6316            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6317            Int16Regs:$r, Int16Regs:$g)>;
   6318 
   6319 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
   6320            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6321            Int16Regs:$r, Int16Regs:$g),
   6322           (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
   6323            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6324            Int16Regs:$r, Int16Regs:$g)>;
   6325 
   6326 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
   6327            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6328            Int32Regs:$r, Int32Regs:$g),
   6329           (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
   6330            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6331            Int32Regs:$r, Int32Regs:$g)>;
   6332 
   6333 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
   6334            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6335            Int64Regs:$r, Int64Regs:$g),
   6336           (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
   6337            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6338            Int64Regs:$r, Int64Regs:$g)>;
   6339 
   6340 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
   6341            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6342            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6343           (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
   6344            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6345            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6346 
   6347 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
   6348            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6349            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6350           (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
   6351            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6352            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6353 
   6354 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
   6355            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6356            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6357           (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
   6358            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6359            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6360 
   6361 
   6362 // .trap variant
   6363 def : Pat<(int_nvvm_sust_b_1d_i8_trap
   6364            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   6365           (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   6366 
   6367 def : Pat<(int_nvvm_sust_b_1d_i16_trap
   6368            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   6369           (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   6370 
   6371 def : Pat<(int_nvvm_sust_b_1d_i32_trap
   6372            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   6373           (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
   6374 
   6375 def : Pat<(int_nvvm_sust_b_1d_i64_trap
   6376            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
   6377           (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
   6378 
   6379 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
   6380            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6381           (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
   6382            Int16Regs:$r, Int16Regs:$g)>;
   6383 
   6384 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
   6385            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6386           (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
   6387            Int16Regs:$r, Int16Regs:$g)>;
   6388 
   6389 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
   6390            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   6391           (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
   6392            Int32Regs:$r, Int32Regs:$g)>;
   6393 
   6394 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
   6395            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   6396           (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
   6397            Int64Regs:$r, Int64Regs:$g)>;
   6398 
   6399 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
   6400            Int64Regs:$s, Int32Regs:$x,
   6401            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6402           (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
   6403            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6404 
   6405 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
   6406            Int64Regs:$s, Int32Regs:$x,
   6407            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6408           (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
   6409            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6410 
   6411 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
   6412            Int64Regs:$s, Int32Regs:$x,
   6413            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6414           (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
   6415            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6416 
   6417 
   6418 
   6419 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
   6420            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   6421           (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6422            Int16Regs:$r)>;
   6423 
   6424 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
   6425            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   6426           (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6427            Int16Regs:$r)>;
   6428 
   6429 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
   6430            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
   6431           (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6432            Int32Regs:$r)>;
   6433 
   6434 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
   6435            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
   6436           (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6437            Int64Regs:$r)>;
   6438 
   6439 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
   6440           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6441           (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6442            Int16Regs:$r, Int16Regs:$g)>;
   6443 
   6444 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
   6445           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6446           (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6447            Int16Regs:$r, Int16Regs:$g)>;
   6448 
   6449 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
   6450           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   6451           (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6452            Int32Regs:$r, Int32Regs:$g)>;
   6453 
   6454 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
   6455           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   6456           (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6457            Int64Regs:$r, Int64Regs:$g)>;
   6458 
   6459 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
   6460            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6461            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6462           (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6463            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6464 
   6465 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
   6466            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6467            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6468           (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6469            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6470 
   6471 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
   6472            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6473            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6474           (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6475            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6476 
   6477 
   6478 
   6479 def : Pat<(int_nvvm_sust_b_2d_i8_trap
   6480            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6481           (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6482            Int16Regs:$r)>;
   6483 
   6484 def : Pat<(int_nvvm_sust_b_2d_i16_trap
   6485            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6486           (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6487            Int16Regs:$r)>;
   6488 
   6489 def : Pat<(int_nvvm_sust_b_2d_i32_trap
   6490            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   6491           (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6492            Int32Regs:$r)>;
   6493 
   6494 def : Pat<(int_nvvm_sust_b_2d_i64_trap
   6495            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   6496           (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6497            Int64Regs:$r)>;
   6498 
   6499 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
   6500           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   6501           (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6502            Int16Regs:$r, Int16Regs:$g)>;
   6503 
   6504 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
   6505           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   6506           (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6507            Int16Regs:$r, Int16Regs:$g)>;
   6508 
   6509 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
   6510           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
   6511           (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6512            Int32Regs:$r, Int32Regs:$g)>;
   6513 
   6514 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
   6515           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
   6516           (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6517            Int64Regs:$r, Int64Regs:$g)>;
   6518 
   6519 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
   6520            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6521            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6522           (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6523            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6524 
   6525 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
   6526            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6527            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6528           (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6529            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6530 
   6531 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
   6532            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6533            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6534           (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6535            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6536 
   6537 
   6538 
   6539 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
   6540           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6541           (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
   6542            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6543            Int16Regs:$r)>;
   6544 
   6545 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
   6546           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6547           (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
   6548            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6549            Int16Regs:$r)>;
   6550 
   6551 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
   6552           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   6553           (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
   6554            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6555            Int32Regs:$r)>;
   6556 
   6557 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
   6558           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   6559           (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
   6560            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6561            Int64Regs:$r)>;
   6562 
   6563 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
   6564            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6565            Int16Regs:$r, Int16Regs:$g),
   6566           (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
   6567            Int32Regs:$x, Int32Regs:$y,
   6568            Int16Regs:$r, Int16Regs:$g)>;
   6569 
   6570 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
   6571            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6572            Int16Regs:$r, Int16Regs:$g),
   6573           (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
   6574            Int32Regs:$x, Int32Regs:$y,
   6575            Int16Regs:$r, Int16Regs:$g)>;
   6576 
   6577 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
   6578            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   6579            Int32Regs:$g),
   6580           (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
   6581            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
   6582 
   6583 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
   6584            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
   6585            Int64Regs:$g),
   6586           (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
   6587            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
   6588 
   6589 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
   6590            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6591            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6592           (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
   6593            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6594            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6595 
   6596 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
   6597            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6598            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6599           (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
   6600            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6601            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6602 
   6603 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
   6604            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6605            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6606           (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
   6607            Int32Regs:$x, Int32Regs:$y,
   6608            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6609 
   6610 
   6611 
   6612 def : Pat<(int_nvvm_sust_b_3d_i8_trap
   6613            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6614            Int16Regs:$r),
   6615           (SUST_B_3D_B8_TRAP Int64Regs:$s,
   6616            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6617            Int16Regs:$r)>;
   6618 
   6619 def : Pat<(int_nvvm_sust_b_3d_i16_trap
   6620            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6621            Int16Regs:$r),
   6622           (SUST_B_3D_B16_TRAP Int64Regs:$s,
   6623            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6624            Int16Regs:$r)>;
   6625 
   6626 def : Pat<(int_nvvm_sust_b_3d_i32_trap
   6627            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6628            Int32Regs:$r),
   6629           (SUST_B_3D_B32_TRAP Int64Regs:$s,
   6630            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6631            Int32Regs:$r)>;
   6632 
   6633 def : Pat<(int_nvvm_sust_b_3d_i64_trap
   6634            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6635            Int64Regs:$r),
   6636           (SUST_B_3D_B64_TRAP Int64Regs:$s,
   6637            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6638            Int64Regs:$r)>;
   6639 
   6640 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
   6641            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6642            Int16Regs:$r, Int16Regs:$g),
   6643           (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
   6644            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6645            Int16Regs:$r, Int16Regs:$g)>;
   6646 
   6647 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
   6648            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6649            Int16Regs:$r, Int16Regs:$g),
   6650           (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
   6651            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6652            Int16Regs:$r, Int16Regs:$g)>;
   6653 
   6654 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
   6655            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6656            Int32Regs:$r, Int32Regs:$g),
   6657           (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
   6658            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6659            Int32Regs:$r, Int32Regs:$g)>;
   6660 
   6661 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
   6662            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6663            Int64Regs:$r, Int64Regs:$g),
   6664           (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
   6665            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6666            Int64Regs:$r, Int64Regs:$g)>;
   6667 
   6668 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
   6669            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6670            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6671           (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
   6672            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6673            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6674 
   6675 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
   6676            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6677            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6678           (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
   6679            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6680            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6681 
   6682 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
   6683            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6684            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6685           (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
   6686            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6687            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6688 
   6689 
   6690 // .zero variant
   6691 def : Pat<(int_nvvm_sust_b_1d_i8_zero
   6692            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   6693           (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   6694 
   6695 def : Pat<(int_nvvm_sust_b_1d_i16_zero
   6696            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   6697           (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   6698 
   6699 def : Pat<(int_nvvm_sust_b_1d_i32_zero
   6700            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   6701           (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
   6702 
   6703 def : Pat<(int_nvvm_sust_b_1d_i64_zero
   6704            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
   6705           (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
   6706 
   6707 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
   6708            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6709           (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
   6710            Int16Regs:$r, Int16Regs:$g)>;
   6711 
   6712 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
   6713            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6714           (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
   6715            Int16Regs:$r, Int16Regs:$g)>;
   6716 
   6717 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
   6718            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   6719           (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
   6720            Int32Regs:$r, Int32Regs:$g)>;
   6721 
   6722 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
   6723            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   6724           (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
   6725            Int64Regs:$r, Int64Regs:$g)>;
   6726 
   6727 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
   6728            Int64Regs:$s, Int32Regs:$x,
   6729            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6730           (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
   6731            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6732 
   6733 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
   6734            Int64Regs:$s, Int32Regs:$x,
   6735            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6736           (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
   6737            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6738 
   6739 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
   6740            Int64Regs:$s, Int32Regs:$x,
   6741            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6742           (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
   6743            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6744 
   6745 
   6746 
   6747 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
   6748            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   6749           (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6750            Int16Regs:$r)>;
   6751 
   6752 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
   6753            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   6754           (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6755            Int16Regs:$r)>;
   6756 
   6757 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
   6758            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
   6759           (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6760            Int32Regs:$r)>;
   6761 
   6762 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
   6763            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
   6764           (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6765            Int64Regs:$r)>;
   6766 
   6767 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
   6768           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6769           (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6770            Int16Regs:$r, Int16Regs:$g)>;
   6771 
   6772 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
   6773           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6774           (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6775            Int16Regs:$r, Int16Regs:$g)>;
   6776 
   6777 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
   6778           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   6779           (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6780            Int32Regs:$r, Int32Regs:$g)>;
   6781 
   6782 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
   6783           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   6784           (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6785            Int64Regs:$r, Int64Regs:$g)>;
   6786 
   6787 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
   6788            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6789            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6790           (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6791            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6792 
   6793 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
   6794            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6795            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6796           (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6797            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6798 
   6799 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
   6800            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6801            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6802           (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6803            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6804 
   6805 
   6806 
   6807 def : Pat<(int_nvvm_sust_b_2d_i8_zero
   6808            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6809           (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6810            Int16Regs:$r)>;
   6811 
   6812 def : Pat<(int_nvvm_sust_b_2d_i16_zero
   6813            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6814           (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6815            Int16Regs:$r)>;
   6816 
   6817 def : Pat<(int_nvvm_sust_b_2d_i32_zero
   6818            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   6819           (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6820            Int32Regs:$r)>;
   6821 
   6822 def : Pat<(int_nvvm_sust_b_2d_i64_zero
   6823            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   6824           (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6825            Int64Regs:$r)>;
   6826 
   6827 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
   6828           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   6829           (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6830            Int16Regs:$r, Int16Regs:$g)>;
   6831 
   6832 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
   6833           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   6834           (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6835            Int16Regs:$r, Int16Regs:$g)>;
   6836 
   6837 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
   6838           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
   6839           (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6840            Int32Regs:$r, Int32Regs:$g)>;
   6841 
   6842 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
   6843           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
   6844           (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6845            Int64Regs:$r, Int64Regs:$g)>;
   6846 
   6847 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
   6848            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6849            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6850           (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6851            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6852 
   6853 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
   6854            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6855            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6856           (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6857            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6858 
   6859 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
   6860            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6861            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6862           (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6863            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6864 
   6865 
   6866 
   6867 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
   6868           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6869           (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
   6870            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6871            Int16Regs:$r)>;
   6872 
   6873 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
   6874           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6875           (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
   6876            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6877            Int16Regs:$r)>;
   6878 
   6879 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
   6880           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   6881           (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
   6882            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6883            Int32Regs:$r)>;
   6884 
   6885 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
   6886           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   6887           (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
   6888            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6889            Int64Regs:$r)>;
   6890 
   6891 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
   6892            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6893            Int16Regs:$r, Int16Regs:$g),
   6894           (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
   6895            Int32Regs:$x, Int32Regs:$y,
   6896            Int16Regs:$r, Int16Regs:$g)>;
   6897 
   6898 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
   6899            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6900            Int16Regs:$r, Int16Regs:$g),
   6901           (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
   6902            Int32Regs:$x, Int32Regs:$y,
   6903            Int16Regs:$r, Int16Regs:$g)>;
   6904 
   6905 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
   6906            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   6907            Int32Regs:$g),
   6908           (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
   6909            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
   6910 
   6911 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
   6912            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
   6913            Int64Regs:$g),
   6914           (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
   6915            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
   6916 
   6917 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
   6918            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6919            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6920           (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
   6921            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6922            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6923 
   6924 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
   6925            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6926            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6927           (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
   6928            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6929            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6930 
   6931 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
   6932            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6933            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6934           (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
   6935            Int32Regs:$x, Int32Regs:$y,
   6936            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6937 
   6938 
   6939 
   6940 def : Pat<(int_nvvm_sust_b_3d_i8_zero
   6941            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6942            Int16Regs:$r),
   6943           (SUST_B_3D_B8_ZERO Int64Regs:$s,
   6944            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6945            Int16Regs:$r)>;
   6946 
   6947 def : Pat<(int_nvvm_sust_b_3d_i16_zero
   6948            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6949            Int16Regs:$r),
   6950           (SUST_B_3D_B16_ZERO Int64Regs:$s,
   6951            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6952            Int16Regs:$r)>;
   6953 
   6954 def : Pat<(int_nvvm_sust_b_3d_i32_zero
   6955            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6956            Int32Regs:$r),
   6957           (SUST_B_3D_B32_ZERO Int64Regs:$s,
   6958            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6959            Int32Regs:$r)>;
   6960 
   6961 def : Pat<(int_nvvm_sust_b_3d_i64_zero
   6962            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6963            Int64Regs:$r),
   6964           (SUST_B_3D_B64_ZERO Int64Regs:$s,
   6965            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6966            Int64Regs:$r)>;
   6967 
   6968 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
   6969            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6970            Int16Regs:$r, Int16Regs:$g),
   6971           (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
   6972            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6973            Int16Regs:$r, Int16Regs:$g)>;
   6974 
   6975 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
   6976            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6977            Int16Regs:$r, Int16Regs:$g),
   6978           (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
   6979            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6980            Int16Regs:$r, Int16Regs:$g)>;
   6981 
   6982 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
   6983            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6984            Int32Regs:$r, Int32Regs:$g),
   6985           (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
   6986            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6987            Int32Regs:$r, Int32Regs:$g)>;
   6988 
   6989 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
   6990            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6991            Int64Regs:$r, Int64Regs:$g),
   6992           (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
   6993            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6994            Int64Regs:$r, Int64Regs:$g)>;
   6995 
   6996 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
   6997            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6998            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6999           (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
   7000            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7001            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   7002 
   7003 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
   7004            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7005            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   7006           (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
   7007            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7008            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   7009 
   7010 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
   7011            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7012            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   7013           (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
   7014            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7015            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   7016 
   7017 
   7018 
   7019 
   7020 def : Pat<(int_nvvm_sust_p_1d_i8_trap
   7021            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   7022           (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   7023 
   7024 def : Pat<(int_nvvm_sust_p_1d_i16_trap
   7025            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   7026           (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   7027 
   7028 def : Pat<(int_nvvm_sust_p_1d_i32_trap
   7029            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   7030           (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
   7031 
   7032 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
   7033            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   7034           (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
   7035            Int16Regs:$r, Int16Regs:$g)>;
   7036 
   7037 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
   7038            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   7039           (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
   7040            Int16Regs:$r, Int16Regs:$g)>;
   7041 
   7042 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
   7043            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   7044           (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
   7045            Int32Regs:$r, Int32Regs:$g)>;
   7046 
   7047 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
   7048            Int64Regs:$s, Int32Regs:$x,
   7049            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   7050           (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
   7051            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   7052 
   7053 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
   7054            Int64Regs:$s, Int32Regs:$x,
   7055            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   7056           (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
   7057            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   7058 
   7059 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
   7060            Int64Regs:$s, Int32Regs:$x,
   7061            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   7062           (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
   7063            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   7064 
   7065 
   7066 
   7067 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
   7068            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   7069           (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   7070            Int16Regs:$r)>;
   7071 
   7072 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
   7073            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   7074           (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   7075            Int16Regs:$r)>;
   7076 
   7077 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
   7078            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
   7079           (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   7080            Int32Regs:$r)>;
   7081 
   7082 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
   7083           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   7084           (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   7085            Int16Regs:$r, Int16Regs:$g)>;
   7086 
   7087 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
   7088           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   7089           (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   7090            Int16Regs:$r, Int16Regs:$g)>;
   7091 
   7092 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
   7093           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   7094           (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   7095            Int32Regs:$r, Int32Regs:$g)>;
   7096 
   7097 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
   7098            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   7099            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   7100           (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   7101            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   7102 
   7103 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
   7104            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   7105            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   7106           (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   7107            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   7108 
   7109 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
   7110            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   7111            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   7112           (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   7113            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   7114 
   7115 
   7116 
   7117 def : Pat<(int_nvvm_sust_p_2d_i8_trap
   7118            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   7119           (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   7120            Int16Regs:$r)>;
   7121 
   7122 def : Pat<(int_nvvm_sust_p_2d_i16_trap
   7123            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   7124           (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   7125            Int16Regs:$r)>;
   7126 
   7127 def : Pat<(int_nvvm_sust_p_2d_i32_trap
   7128            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   7129           (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   7130            Int32Regs:$r)>;
   7131 
   7132 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
   7133           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   7134           (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   7135            Int16Regs:$r, Int16Regs:$g)>;
   7136 
   7137 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
   7138           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   7139           (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   7140            Int16Regs:$r, Int16Regs:$g)>;
   7141 
   7142 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
   7143           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
   7144           (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   7145            Int32Regs:$r, Int32Regs:$g)>;
   7146 
   7147 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
   7148            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   7149            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   7150           (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   7151            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   7152 
   7153 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
   7154            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   7155            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   7156           (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   7157            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   7158 
   7159 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
   7160            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   7161            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   7162           (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   7163            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   7164 
   7165 
   7166 
   7167 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
   7168           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   7169           (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
   7170            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   7171            Int16Regs:$r)>;
   7172 
   7173 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
   7174           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   7175           (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
   7176            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   7177            Int16Regs:$r)>;
   7178 
   7179 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
   7180           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   7181           (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
   7182            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   7183            Int32Regs:$r)>;
   7184 
   7185 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
   7186            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   7187            Int16Regs:$r, Int16Regs:$g),
   7188           (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
   7189            Int32Regs:$x, Int32Regs:$y,
   7190            Int16Regs:$r, Int16Regs:$g)>;
   7191 
   7192 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
   7193            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   7194            Int16Regs:$r, Int16Regs:$g),
   7195           (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
   7196            Int32Regs:$x, Int32Regs:$y,
   7197            Int16Regs:$r, Int16Regs:$g)>;
   7198 
   7199 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
   7200            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   7201            Int32Regs:$g),
   7202           (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
   7203            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
   7204 
   7205 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
   7206            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   7207            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   7208           (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
   7209            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   7210            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   7211 
   7212 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
   7213            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   7214            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   7215           (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
   7216            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   7217            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   7218 
   7219 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
   7220            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   7221            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   7222           (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
   7223            Int32Regs:$x, Int32Regs:$y,
   7224            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   7225 
   7226 
   7227 
   7228 def : Pat<(int_nvvm_sust_p_3d_i8_trap
   7229            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7230            Int16Regs:$r),
   7231           (SUST_P_3D_B8_TRAP Int64Regs:$s,
   7232            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7233            Int16Regs:$r)>;
   7234 
   7235 def : Pat<(int_nvvm_sust_p_3d_i16_trap
   7236            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7237            Int16Regs:$r),
   7238           (SUST_P_3D_B16_TRAP Int64Regs:$s,
   7239            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7240            Int16Regs:$r)>;
   7241 
   7242 def : Pat<(int_nvvm_sust_p_3d_i32_trap
   7243            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7244            Int32Regs:$r),
   7245           (SUST_P_3D_B32_TRAP Int64Regs:$s,
   7246            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7247            Int32Regs:$r)>;
   7248 
   7249 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
   7250            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7251            Int16Regs:$r, Int16Regs:$g),
   7252           (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
   7253            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7254            Int16Regs:$r, Int16Regs:$g)>;
   7255 
   7256 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
   7257            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7258            Int16Regs:$r, Int16Regs:$g),
   7259           (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
   7260            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7261            Int16Regs:$r, Int16Regs:$g)>;
   7262 
   7263 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
   7264            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7265            Int32Regs:$r, Int32Regs:$g),
   7266           (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
   7267            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7268            Int32Regs:$r, Int32Regs:$g)>;
   7269 
   7270 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
   7271            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7272            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   7273           (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
   7274            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7275            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   7276 
   7277 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
   7278            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7279            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   7280           (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
   7281            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7282            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   7283 
   7284 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
   7285            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7286            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   7287           (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
   7288            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   7289            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   7290 
   7291 //-----------------------------------
   7292 // Read Special Registers
   7293 //-----------------------------------
   7294 
   7295 class PTX_READ_SREG_R64<string regname, Intrinsic intop>
   7296   : NVPTXInst<(outs Int64Regs:$d), (ins),
   7297               !strconcat("mov.u64 \t$d, %", regname, ";"),
   7298               [(set Int64Regs:$d, (intop))]>;
   7299 
   7300 class PTX_READ_SREG_R32<string regname, Intrinsic intop>
   7301   : NVPTXInst<(outs Int32Regs:$d), (ins),
   7302               !strconcat("mov.u32 \t$d, %", regname, ";"),
   7303               [(set Int32Regs:$d, (intop))]>;
   7304 
   7305 // TODO Add read vector-version of special registers
   7306 
   7307 def INT_PTX_SREG_TID_X :
   7308     PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
   7309 def INT_PTX_SREG_TID_Y :
   7310     PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
   7311 def INT_PTX_SREG_TID_Z :
   7312     PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
   7313 def INT_PTX_SREG_TID_W :
   7314     PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
   7315 
   7316 def INT_PTX_SREG_NTID_X :
   7317     PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
   7318 def INT_PTX_SREG_NTID_Y :
   7319     PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
   7320 def INT_PTX_SREG_NTID_Z :
   7321     PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
   7322 def INT_PTX_SREG_NTID_W :
   7323     PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
   7324 
   7325 def INT_PTX_SREG_LANEID :
   7326     PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
   7327 def INT_PTX_SREG_WARPID :
   7328     PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
   7329 def INT_PTX_SREG_NWARPID :
   7330     PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
   7331 
   7332 def INT_PTX_SREG_CTAID_X :
   7333     PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
   7334 def INT_PTX_SREG_CTAID_Y :
   7335     PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
   7336 def INT_PTX_SREG_CTAID_Z :
   7337     PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
   7338 def INT_PTX_SREG_CTAID_W :
   7339     PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
   7340 
   7341 def INT_PTX_SREG_NCTAID_X :
   7342     PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
   7343 def INT_PTX_SREG_NCTAID_Y :
   7344     PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
   7345 def INT_PTX_SREG_NCTAID_Z :
   7346     PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
   7347 def INT_PTX_SREG_NCTAID_W :
   7348     PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
   7349 
   7350 def INT_PTX_SREG_SMID :
   7351     PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
   7352 def INT_PTX_SREG_NSMID :
   7353     PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
   7354 def INT_PTX_SREG_GRIDID :
   7355     PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
   7356 
   7357 def INT_PTX_SREG_LANEMASK_EQ :
   7358     PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
   7359 def INT_PTX_SREG_LANEMASK_LE :
   7360     PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
   7361 def INT_PTX_SREG_LANEMASK_LT :
   7362     PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
   7363 def INT_PTX_SREG_LANEMASK_GE :
   7364     PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
   7365 def INT_PTX_SREG_LANEMASK_GT :
   7366     PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
   7367 
   7368 def INT_PTX_SREG_CLOCK :
   7369     PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
   7370 def INT_PTX_SREG_CLOCK64 :
   7371     PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
   7372 
   7373 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
   7374 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
   7375 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
   7376 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
   7377 
   7378 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
   7379 // handle the constant.
   7380 def INT_PTX_SREG_WARPSIZE :
   7381     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
   7382               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
   7383 
   7384 //
   7385 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
   7386 //
   7387 
   7388 class EmptyNVPTXInst : NVPTXInst<(outs), (ins), "?", []>;
   7389 
   7390 class WMMA_LOAD_GALSTOS<string Geometry, string Abc, string Layout,
   7391                         string Space, string Type, NVPTXRegClass regclass,
   7392                         DAGOperand SrcOp, bit WithStride>
   7393   : EmptyNVPTXInst,
   7394     Requires<[!if(!eq(Geometry, "m16n16k16"),
   7395                   hasPTX60,
   7396                   hasPTX61),
   7397               hasSM70]> {
   7398   // Pattern (created by WMMA_LOAD_INTR_HELPER below) that matches the intrinsic
   7399   // for this function.
   7400   PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA_"
   7401                                        # Geometry # "_load_"
   7402                                        # !subst("c", "c_" # Type, Abc)
   7403                                        # "_" # Layout
   7404                                        # !subst(".", "_", Space)
   7405                                        # !if(WithStride,"_stride", "")
   7406                                        # "_Intr");
   7407   dag OutsR03 = (outs regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3);
   7408   dag OutsR47 = (outs regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7);
   7409   dag Outs = !if(!eq(Abc#Type,"cf16"), OutsR03, !con(OutsR03, OutsR47));
   7410 
   7411   dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
   7412   dag Ins = !con((ins SrcOp:$src), StrideArg);
   7413 
   7414   // Build a dag pattern that matches the intrinsic call.
   7415   // We want a dag that looks like this:
   7416   // (set <output args>, (intrinsic <input arguments>)) where input and
   7417   // output arguments are named patterns that would match corresponding
   7418   // input/output arguments of the instruction.
   7419   //
   7420   // First we construct (set <output arguments>) from instruction's outs dag by
   7421   // replacing dag operator 'outs' with 'set'.
   7422   dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
   7423   // Similarly, construct (intrinsic <input arguments>) sub-dag from
   7424   // instruction's input arguments, only now we also need to replace operands
   7425   // with patterns that would match them and the operator 'ins' with the
   7426   // intrinsic.
   7427   dag PatArgs = !foreach(tmp, Ins,
   7428                               !subst(imem, ADDRvar,
   7429                               !subst(MEMri64, ADDRri64,
   7430                               !subst(MEMri, ADDRri,
   7431                               !subst(ins, IntrMatcher, tmp)))));
   7432   // Finally, consatenate both parts together. !con() requires both dags to have
   7433   // the same operator, so we wrap PatArgs in a (set ...) dag.
   7434   let Pattern = [!con(PatOuts, (set PatArgs))];
   7435   let OutOperandList = Outs;
   7436   let InOperandList = Ins;
   7437   let AsmString = "wmma.load."
   7438                   # Abc
   7439                   # ".sync"
   7440                   # "." # Layout
   7441                   # "." # Geometry
   7442                   # Space
   7443                   # "." # Type # " \t"
   7444                   # !if(!eq(Abc#Type, "cf16"),
   7445                         "{{$r0, $r1, $r2, $r3}}",
   7446                         "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
   7447                   # ", [$src]"
   7448                   # !if(WithStride, ", $ldm", "")
   7449                   # ";";
   7450 }
   7451 
   7452 class WMMA_LOAD_INTR_HELPER<string Geometry, string Abc, string Layout,
   7453                             string Space, string Type, bit WithStride>
   7454                            : PatFrag <(ops),(ops)> {
   7455   // Intrinsic that matches this instruction.
   7456   Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma"
   7457                                     # "_" # Geometry # "_load_"
   7458                                     # Abc # "_" # Type # "_" # Layout
   7459                                     # !if(WithStride,"_stride", ""));
   7460   code match_generic = [{
   7461    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
   7462   }];
   7463   code match_shared = [{
   7464    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
   7465   }];
   7466   code match_global = [{
   7467    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
   7468   }];
   7469 
   7470   let Operands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
   7471   let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
   7472   let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
   7473                       !if(!eq(Space, ".global"), match_global, match_generic));
   7474 }
   7475 
   7476 multiclass WMMA_LOAD_GALSTS<string Geometry, string Abc, string Layout,
   7477                             string Space, string Type, NVPTXRegClass regclass,
   7478                             bit WithStride> {
   7479   def _avar:  WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
   7480                                 imem, WithStride>;
   7481   def _areg: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
   7482                                 Int32Regs, WithStride>;
   7483   def _areg64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
   7484                                 Int64Regs, WithStride>;
   7485   def _ari: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
   7486                                 MEMri, WithStride>;
   7487   def _ari64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
   7488                                 MEMri64, WithStride>;
   7489 }
   7490 
   7491 multiclass WMMA_LOAD_GALSTSh<string Geometry, string Abc, string Layout,
   7492                              string Space, string Type, NVPTXRegClass regclass,
   7493                              bit WithStride> {
   7494   // Define a PatFrag that matches appropriate intrinsic that loads from the
   7495   // given address space.
   7496   def _Intr:  WMMA_LOAD_INTR_HELPER<Geometry, Abc, Layout, Space, Type,
   7497                                     WithStride>;
   7498   defm NAME:  WMMA_LOAD_GALSTS<Geometry, Abc, Layout, Space, Type, regclass,
   7499                                WithStride>;
   7500 }
   7501 
   7502 multiclass WMMA_LOAD_GALST<string Geometry, string Abc, string Layout,
   7503                            string Space, string Type, NVPTXRegClass regclass> {
   7504   defm _stride: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 1>;
   7505   defm NAME:    WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 0>;
   7506 }
   7507 
   7508 multiclass WMMA_LOAD_GALT<string Geometry, string Abc, string Layout,
   7509                           string Type, NVPTXRegClass regclass> {
   7510   defm _global: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".global",
   7511                                 Type, regclass>;
   7512   defm _shared: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".shared",
   7513                                 Type, regclass>;
   7514   defm NAME:    WMMA_LOAD_GALST<Geometry, Abc, Layout,        "",
   7515                                 Type, regclass>;
   7516 }
   7517 
   7518 multiclass WMMA_LOAD_GAT<string Geometry, string Abc,
   7519                          string Type, NVPTXRegClass regclass> {
   7520   defm _row: WMMA_LOAD_GALT<Geometry, Abc, "row", Type, regclass>;
   7521   defm _col: WMMA_LOAD_GALT<Geometry, Abc, "col", Type, regclass>;
   7522 }
   7523 
   7524 multiclass WMMA_LOAD_G<string Geometry> {
   7525   defm _load_a: WMMA_LOAD_GAT<Geometry, "a", "f16", Float16x2Regs>;
   7526   defm _load_b: WMMA_LOAD_GAT<Geometry, "b", "f16", Float16x2Regs>;
   7527   defm _load_c_f16: WMMA_LOAD_GAT<Geometry, "c", "f16", Float16x2Regs>;
   7528   defm _load_c_f32: WMMA_LOAD_GAT<Geometry, "c", "f32", Float32Regs>;
   7529 }
   7530 
   7531 defm INT_WMMA_m32n8k16: WMMA_LOAD_G<"m32n8k16">;
   7532 defm INT_WMMA_m16n16k16: WMMA_LOAD_G<"m16n16k16">;
   7533 defm INT_WMMA_m8n32k16: WMMA_LOAD_G<"m8n32k16">;
   7534 
   7535 //
   7536 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
   7537 //
   7538 class WMMA_STORE_D_GLSTSO<string Geometry, string Layout, string Space,
   7539                           string Type, NVPTXRegClass regclass,
   7540                           bit WithStride, DAGOperand DstOp>
   7541   : EmptyNVPTXInst,
   7542     Requires<[!if(!eq(Geometry, "m16n16k16"),
   7543                   hasPTX60,
   7544                   hasPTX61),
   7545               hasSM70]> {
   7546   PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA"
   7547                                        # "_" # Geometry # "_store_d"
   7548                                        # "_" # Type
   7549                                        # "_" # Layout
   7550                                        # !subst(".", "_", Space)
   7551                                        # !if(WithStride,"_stride", "")
   7552                                        # "_Intr");
   7553   dag InsR03 = (ins DstOp:$src, regclass:$r0, regclass:$r1,
   7554                                 regclass:$r2, regclass:$r3);
   7555   dag InsR47 = (ins regclass:$r4, regclass:$r5,
   7556                     regclass:$r6, regclass:$r7);
   7557   dag InsR = !if(!eq(Type,"f16"), InsR03, !con(InsR03, InsR47));
   7558   dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
   7559   dag Ins = !con(InsR, StrideArg);
   7560 
   7561   // Construct the pattern to match corresponding intrinsic call. See the
   7562   // details in the comments in WMMA_LOAD_ALSTOS.
   7563   dag PatArgs = !foreach(tmp, Ins,
   7564                               !subst(imem, ADDRvar,
   7565                               !subst(MEMri64, ADDRri64,
   7566                               !subst(MEMri, ADDRri,
   7567                               !subst(ins, IntrMatcher, tmp)))));
   7568   let Pattern = [PatArgs];
   7569   let OutOperandList = (outs);
   7570   let InOperandList = Ins;
   7571   let AsmString = "wmma.store.d.sync."
   7572                   # Layout
   7573                   # "." # Geometry
   7574                   # Space
   7575                   # "." # Type
   7576                   # " \t[$src],"
   7577                   # !if(!eq(Type,"f16"),
   7578                         "{{$r0, $r1, $r2, $r3}}",
   7579                         "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
   7580                   # !if(WithStride, ", $ldm", "")
   7581                   # ";";
   7582 
   7583 }
   7584 
   7585 class WMMA_STORE_INTR_HELPER<string Geometry, string Layout, string Space,
   7586                              string Type, bit WithStride>
   7587                             : PatFrag <(ops),(ops)> {
   7588   // Intrinsic that matches this instruction.
   7589   Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
   7590                                     # Geometry
   7591                                     # "_store_d"
   7592                                     # "_" # Type
   7593                                     # "_" # Layout
   7594                                     # !if(WithStride, "_stride", ""));
   7595   code match_generic = [{
   7596    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
   7597   }];
   7598   code match_shared = [{
   7599    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
   7600   }];
   7601   code match_global = [{
   7602    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
   7603   }];
   7604 
   7605   dag Args = !if(!eq(Type,"f16"),
   7606                  (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3),
   7607                  (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3,
   7608                                  node:$r4, node:$r5, node:$r6, node:$r7));
   7609   dag StrideArg = !if(WithStride, (ops node:$ldm), (ops));
   7610   let Operands = !con(Args, StrideArg);
   7611   let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
   7612   let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
   7613                       !if(!eq(Space, ".global"), match_global, match_generic));
   7614 }
   7615 
   7616 multiclass WMMA_STORE_D_GLSTS<string Geometry, string Layout, string Space,
   7617                               string Type, NVPTXRegClass regclass,
   7618                               bit WithStride> {
   7619   def _avar:   WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
   7620                                    WithStride, imem>;
   7621   def _areg:   WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
   7622                                    WithStride, Int32Regs>;
   7623   def _areg64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
   7624                                    WithStride, Int64Regs>;
   7625   def _ari:    WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
   7626                                    WithStride, MEMri>;
   7627   def _ari64:  WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
   7628                                    WithStride, MEMri64>;
   7629 }
   7630 
   7631 multiclass WMMA_STORE_D_GLSTSh<string Geometry, string Layout, string Space,
   7632                                string Type, NVPTXRegClass regclass,
   7633                                bit WithStride> {
   7634   // Define a PatFrag that matches appropriate intrinsic that loads from the
   7635   // given address space.
   7636   def _Intr:    WMMA_STORE_INTR_HELPER<Geometry, Layout, Space, Type,
   7637                                        WithStride>;
   7638   defm NAME:    WMMA_STORE_D_GLSTS<Geometry, Layout, Space, Type, regclass,
   7639                                    WithStride>;
   7640 }
   7641 
   7642 multiclass WMMA_STORE_D_GLST<string Geometry, string Layout, string Space,
   7643                              string Type, NVPTXRegClass regclass > {
   7644   defm _stride: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 1>;
   7645   defm NAME:    WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 0>;
   7646 }
   7647 
   7648 multiclass WMMA_STORE_D_GLT<string Geometry, string Layout,
   7649                            string Type, NVPTXRegClass regclass> {
   7650   defm _global: WMMA_STORE_D_GLST<Geometry, Layout, ".global", Type, regclass>;
   7651   defm _shared: WMMA_STORE_D_GLST<Geometry, Layout, ".shared", Type, regclass>;
   7652   defm NAME:    WMMA_STORE_D_GLST<Geometry, Layout,        "", Type, regclass>;
   7653 }
   7654 
   7655 multiclass WMMA_STORE_D_GT<string Geometry, string Type,
   7656                            NVPTXRegClass regclass> {
   7657   defm _row:    WMMA_STORE_D_GLT<Geometry, "row", Type, regclass>;
   7658   defm _col:    WMMA_STORE_D_GLT<Geometry, "col", Type, regclass>;
   7659 }
   7660 
   7661 multiclass WMMA_STORE_D_G<string Geometry> {
   7662   defm _store_d_f16: WMMA_STORE_D_GT<Geometry, "f16", Float16x2Regs>;
   7663   defm _store_d_f32: WMMA_STORE_D_GT<Geometry, "f32", Float32Regs>;
   7664 }
   7665 
   7666 defm INT_WMMA_m32n8k16: WMMA_STORE_D_G<"m32n8k16">;
   7667 defm INT_WMMA_m16n16k16: WMMA_STORE_D_G<"m16n16k16">;
   7668 defm INT_WMMA_m8n32k16: WMMA_STORE_D_G<"m8n32k16">;
   7669 
   7670 // WMMA.MMA
   7671 class WMMA_MMA_GABDCS<string Geometry, string ALayout, string BLayout,
   7672                      string DType, NVPTXRegClass d_reg,
   7673                      string CType, NVPTXRegClass c_reg,
   7674                      NVPTXRegClass ab_reg,
   7675                      string Satfinite = "">
   7676   : EmptyNVPTXInst,
   7677     Requires<[!if(!eq(Geometry, "m16n16k16"),
   7678                   hasPTX60,
   7679                   hasPTX61),
   7680               hasSM70]> {
   7681   Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
   7682                                     # Geometry
   7683                                     # "_mma"
   7684                                     # "_" # ALayout
   7685                                     # "_" # BLayout
   7686                                     # "_" # DType
   7687                                     # "_" # CType
   7688                                     # !subst(".", "_", Satfinite));
   7689   dag Outs = !if(!eq(DType,"f16"),
   7690                  (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3),
   7691                  (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3,
   7692                        d_reg:$d4, d_reg:$d5, d_reg:$d6, d_reg:$d7));
   7693   dag InsExtraCArgs = !if(!eq(CType,"f16"),
   7694                           (ins),
   7695                           (ins c_reg:$c4,  c_reg:$c5,  c_reg:$c6,  c_reg:$c7));
   7696   dag Ins = !con((ins ab_reg:$a0, ab_reg:$a1, ab_reg:$a2, ab_reg:$a3,
   7697                       ab_reg:$a4, ab_reg:$a5, ab_reg:$a6, ab_reg:$a7,
   7698                       ab_reg:$b0, ab_reg:$b1, ab_reg:$b2, ab_reg:$b3,
   7699                       ab_reg:$b4, ab_reg:$b5, ab_reg:$b6, ab_reg:$b7,
   7700                       c_reg:$c0,  c_reg:$c1,  c_reg:$c2,  c_reg:$c3),
   7701                   InsExtraCArgs);
   7702 
   7703   // Construct the pattern to match corresponding intrinsic call. See the
   7704   // details in the comments in WMMA_LOAD_ALSTOS.
   7705   dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
   7706   dag PatArgs = !foreach(tmp, Ins, !subst(ins, Intr, tmp));
   7707   let Pattern = [!con(PatOuts, (set PatArgs))];
   7708   let OutOperandList = Outs;
   7709   let InOperandList  = Ins;
   7710   let AsmString = "wmma.mma.sync."
   7711                   # ALayout
   7712                   # "." # BLayout
   7713                   # "." # Geometry
   7714                   # "." # DType
   7715                   # "." # CType
   7716                   # Satfinite # "\n\t\t"
   7717                   # !if(!eq(DType,"f16"),
   7718                         "{{$d0, $d1, $d2, $d3}}, \n\t\t",
   7719                         "{{$d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7}},\n\t\t")
   7720                   # "{{$a0, $a1, $a2, $a3, $a4, $a5, $a6, $a7}},\n\t\t"
   7721                   # "{{$b0, $b1, $b2, $b3, $b4, $b5, $b6, $b7}},\n\t\t"
   7722                   # !if(!eq(CType,"f16"),
   7723                         "{{$c0, $c1, $c2, $c3}};",
   7724                         "{{$c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7}};");
   7725 }
   7726 
   7727 multiclass WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
   7728                          string DType, NVPTXRegClass d_reg,
   7729                          string CType, NVPTXRegClass c_reg> {
   7730   def _satfinite: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
   7731                                  DType, d_reg, CType, c_reg,
   7732                                  Float16x2Regs, ".satfinite">;
   7733   def NAME:       WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
   7734                                  DType, d_reg, CType, c_reg,
   7735                                  Float16x2Regs>;
   7736 }
   7737 
   7738 multiclass WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
   7739                         string DType, NVPTXRegClass d_reg> {
   7740   defm _f16: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
   7741                             "f16", Float16x2Regs>;
   7742   defm _f32: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
   7743                             "f32", Float32Regs>;
   7744 }
   7745 
   7746 multiclass WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
   7747   defm _f16: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", Float16x2Regs>;
   7748   defm _f32: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", Float32Regs>;
   7749 }
   7750 
   7751 multiclass WMMA_MMA_GA<string Geometry, string ALayout> {
   7752   defm _col: WMMA_MMA_GAB<Geometry, ALayout, "col">;
   7753   defm _row: WMMA_MMA_GAB<Geometry, ALayout, "row">;
   7754 }
   7755 
   7756 multiclass WMMA_MMA_G<string Geometry> {
   7757   defm _col: WMMA_MMA_GA<Geometry, "col">;
   7758   defm _row: WMMA_MMA_GA<Geometry, "row">;
   7759 }
   7760 
   7761 defm INT_WMMA_MMA_m32n8k16 : WMMA_MMA_G<"m32n8k16">;
   7762 defm INT_WMMA_MMA_m16n16k16 : WMMA_MMA_G<"m16n16k16">;
   7763 defm INT_WMMA_MMA_m8n32k16 : WMMA_MMA_G<"m8n32k16">;
   7764