Home | History | Annotate | Download | only in NVPTX
      1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 def immFloat0 : PatLeaf<(fpimm), [{
     11     float f = (float)N->getValueAPF().convertToFloat();
     12     return (f==0.0f);
     13 }]>;
     14 
     15 def immFloat1 : PatLeaf<(fpimm), [{
     16     float f = (float)N->getValueAPF().convertToFloat();
     17     return (f==1.0f);
     18 }]>;
     19 
     20 def immDouble0 : PatLeaf<(fpimm), [{
     21     double d = (double)N->getValueAPF().convertToDouble();
     22     return (d==0.0);
     23 }]>;
     24 
     25 def immDouble1 : PatLeaf<(fpimm), [{
     26     double d = (double)N->getValueAPF().convertToDouble();
     27     return (d==1.0);
     28 }]>;
     29 
     30 
     31 
     32 //-----------------------------------
     33 // Synchronization Functions
     34 //-----------------------------------
     35 def INT_CUDA_SYNCTHREADS : NVPTXInst<(outs), (ins),
     36                   "bar.sync \t0;",
     37       [(int_cuda_syncthreads)]>;
     38 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
     39                   "bar.sync \t0;",
     40       [(int_nvvm_barrier0)]>;
     41 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
     42   !strconcat("{{ \n\t",
     43       !strconcat(".reg .pred \t%p1; \n\t",
     44       !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
     45       !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
     46         !strconcat("}}", ""))))),
     47       [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
     48 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
     49   !strconcat("{{ \n\t",
     50       !strconcat(".reg .pred \t%p1; \n\t",
     51       !strconcat(".reg .pred \t%p2; \n\t",
     52       !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
     53       !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t",
     54       !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
     55         !strconcat("}}", ""))))))),
     56       [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
     57 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
     58   !strconcat("{{ \n\t",
     59       !strconcat(".reg .pred \t%p1; \n\t",
     60       !strconcat(".reg .pred \t%p2; \n\t",
     61       !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
     62       !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t",
     63       !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
     64         !strconcat("}}", ""))))))),
     65       [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
     66 
     67 
     68 //-----------------------------------
     69 // Explicit Memory Fence Functions
     70 //-----------------------------------
     71 class MEMBAR<string StrOp, Intrinsic IntOP> :
     72               NVPTXInst<(outs), (ins),
     73             StrOp, [(IntOP)]>;
     74 
     75 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
     76 def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
     77 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
     78 
     79 
     80 //-----------------------------------
     81 // Math Functions
     82 //-----------------------------------
     83 
     84 // Map min(1.0, max(0.0, x)) to sat(x)
     85 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
     86 // NaN
     87 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
     88 // Same story for fmax, fmin.
     89 
     90 def : Pat<(int_nvvm_fmin_f immFloat1,
     91             (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
     92           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
     93 def : Pat<(int_nvvm_fmin_f immFloat1,
     94             (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
     95           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
     96 def : Pat<(int_nvvm_fmin_f
     97             (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
     98           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
     99 def : Pat<(int_nvvm_fmin_f
    100             (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
    101           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
    102 
    103 def : Pat<(int_nvvm_fmin_d immDouble1,
    104             (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
    105           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
    106 def : Pat<(int_nvvm_fmin_d immDouble1,
    107             (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
    108           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
    109 def : Pat<(int_nvvm_fmin_d
    110             (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
    111           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
    112 def : Pat<(int_nvvm_fmin_d
    113             (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
    114           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
    115 
    116 
    117 // We need a full string for OpcStr here because we need to deal with case like
    118 // INT_PTX_RECIP.
    119 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
    120   NVPTXRegClass src_regclass, Intrinsic IntOP>
    121             : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
    122             OpcStr,
    123         [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
    124 
    125 // We need a full string for OpcStr here because we need to deal with the case
    126 // like INT_PTX_NATIVE_POWR_F.
    127 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
    128   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
    129             : NVPTXInst<(outs t_regclass:$dst),
    130               (ins s0_regclass:$src0, s1_regclass:$src1),
    131             OpcStr,
    132         [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
    133 
    134 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
    135   NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
    136   NVPTXRegClass s2_regclass, Intrinsic IntOP>
    137             : NVPTXInst<(outs t_regclass:$dst),
    138               (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
    139             OpcStr,
    140         [(set t_regclass:$dst,
    141           (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
    142 
    143 //
    144 // MISC
    145 //
    146 
    147 def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
    148   int_nvvm_clz_i>;
    149 def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
    150   int_nvvm_clz_ll>;
    151 
    152 def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
    153   int_nvvm_popc_i>;
    154 def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
    155   int_nvvm_popc_ll>;
    156 
    157 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
    158   Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
    159 
    160 //
    161 // Min Max
    162 //
    163 
    164 def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs,
    165   Int32Regs, Int32Regs, int_nvvm_min_i>;
    166 def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs,
    167   Int32Regs, Int32Regs, int_nvvm_min_ui>;
    168 
    169 def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs,
    170   Int64Regs, Int64Regs, int_nvvm_min_ll>;
    171 def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs,
    172   Int64Regs, Int64Regs, int_nvvm_min_ull>;
    173 
    174 def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs,
    175   Int32Regs, Int32Regs, int_nvvm_max_i>;
    176 def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs,
    177   Int32Regs, Int32Regs, int_nvvm_max_ui>;
    178 
    179 def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs,
    180   Int64Regs, Int64Regs, int_nvvm_max_ll>;
    181 def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs,
    182   Int64Regs, Int64Regs, int_nvvm_max_ull>;
    183 
    184 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
    185   Float32Regs, Float32Regs, int_nvvm_fmin_f>;
    186 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
    187   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
    188 
    189 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
    190   Float32Regs, Float32Regs, int_nvvm_fmax_f>;
    191 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
    192   Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
    193 
    194 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
    195   Float64Regs, Float64Regs, int_nvvm_fmin_d>;
    196 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
    197   Float64Regs, Float64Regs, int_nvvm_fmax_d>;
    198 
    199 //
    200 // Multiplication
    201 //
    202 
    203 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
    204   Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
    205 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
    206   Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
    207 
    208 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
    209   Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
    210 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
    211   Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
    212 
    213 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
    214   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
    215 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
    216   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
    217 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
    218   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
    219 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
    220   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
    221 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
    222   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
    223 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
    224   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
    225 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
    226   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
    227 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
    228   Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
    229 
    230 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
    231   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
    232 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
    233   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
    234 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
    235   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
    236 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
    237   Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
    238 
    239 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
    240   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
    241 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
    242   Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
    243 
    244 //
    245 // Div
    246 //
    247 
    248 def INT_NVVM_DIV_APPROX_FTZ_F
    249   : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
    250     Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
    251 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
    252   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
    253 
    254 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
    255   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
    256 def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
    257   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
    258 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
    259   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
    260 def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
    261   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
    262 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
    263   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
    264 def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
    265   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
    266 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
    267   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
    268 def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
    269   Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
    270 
    271 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
    272   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
    273 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
    274   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
    275 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
    276   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
    277 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
    278   Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
    279 
    280 //
    281 // Brev
    282 //
    283 
    284 def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
    285   int_nvvm_brev32>;
    286 def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs,
    287   int_nvvm_brev64>;
    288 
    289 //
    290 // Sad
    291 //
    292 
    293 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
    294   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
    295 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
    296   Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
    297 
    298 //
    299 // Floor  Ceil
    300 //
    301 
    302 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
    303           (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
    304 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
    305           (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
    306 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
    307           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
    308 
    309 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
    310           (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
    311 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
    312           (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
    313 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
    314           (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
    315 
    316 //
    317 // Abs
    318 //
    319 
    320 def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs,
    321   int_nvvm_abs_i>;
    322 def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs,
    323   int_nvvm_abs_ll>;
    324 
    325 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
    326   Float32Regs, int_nvvm_fabs_ftz_f>;
    327 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
    328   Float32Regs, int_nvvm_fabs_f>;
    329 
    330 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
    331   Float64Regs, int_nvvm_fabs_d>;
    332 
    333 //
    334 // Round
    335 //
    336 
    337 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
    338           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
    339 def : Pat<(int_nvvm_round_f Float32Regs:$a),
    340           (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
    341 def : Pat<(int_nvvm_round_d Float64Regs:$a),
    342           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
    343 
    344 //
    345 // Trunc
    346 //
    347 
    348 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
    349           (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
    350 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
    351           (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
    352 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
    353           (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
    354 
    355 //
    356 // Saturate
    357 //
    358 
    359 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
    360           (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
    361 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
    362           (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
    363 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
    364           (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
    365 
    366 //
    367 // Exp2  Log2
    368 //
    369 
    370 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
    371   Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
    372 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
    373   Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
    374 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
    375   Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
    376 
    377 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
    378   Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
    379 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
    380   Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
    381 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
    382   Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
    383 
    384 //
    385 // Sin  Cos
    386 //
    387 
    388 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
    389   Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
    390 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
    391   Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
    392 
    393 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
    394   Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
    395 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
    396   Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
    397 
    398 //
    399 // Fma
    400 //
    401 
    402 def INT_NVVM_FMA_RN_FTZ_F
    403   : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
    404     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
    405 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
    406   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
    407 def INT_NVVM_FMA_RZ_FTZ_F
    408   : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
    409     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
    410 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
    411   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
    412 def INT_NVVM_FMA_RM_FTZ_F
    413   : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
    414     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
    415 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
    416   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
    417 def INT_NVVM_FMA_RP_FTZ_F
    418   : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
    419     Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
    420 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
    421   Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
    422 
    423 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
    424   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
    425 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
    426   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
    427 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
    428   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
    429 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
    430   Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
    431 
    432 //
    433 // Rcp
    434 //
    435 
    436 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
    437   Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
    438 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
    439   Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
    440 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
    441   Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
    442 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
    443   Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
    444 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
    445   Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
    446 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
    447   Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
    448 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
    449   Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
    450 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
    451   Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
    452 
    453 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
    454   Float64Regs, int_nvvm_rcp_rn_d>;
    455 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
    456   Float64Regs, int_nvvm_rcp_rz_d>;
    457 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
    458   Float64Regs, int_nvvm_rcp_rm_d>;
    459 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
    460   Float64Regs, int_nvvm_rcp_rp_d>;
    461 
    462 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
    463   Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
    464 
    465 //
    466 // Sqrt
    467 //
    468 
    469 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
    470   Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
    471 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
    472   Float32Regs, int_nvvm_sqrt_rn_f>;
    473 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
    474   Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
    475 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
    476   Float32Regs, int_nvvm_sqrt_rz_f>;
    477 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
    478   Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
    479 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
    480   Float32Regs, int_nvvm_sqrt_rm_f>;
    481 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
    482   Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
    483 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
    484   Float32Regs, int_nvvm_sqrt_rp_f>;
    485 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
    486   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
    487 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
    488   Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
    489 
    490 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
    491   Float64Regs, int_nvvm_sqrt_rn_d>;
    492 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
    493   Float64Regs, int_nvvm_sqrt_rz_d>;
    494 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
    495   Float64Regs, int_nvvm_sqrt_rm_d>;
    496 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
    497   Float64Regs, int_nvvm_sqrt_rp_d>;
    498 
    499 // nvvm_sqrt intrinsic
    500 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
    501           (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
    502 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
    503           (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
    504 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
    505           (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
    506 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
    507           (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
    508 
    509 //
    510 // Rsqrt
    511 //
    512 
    513 def INT_NVVM_RSQRT_APPROX_FTZ_F
    514   : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
    515     int_nvvm_rsqrt_approx_ftz_f>;
    516 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
    517   Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
    518 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
    519   Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
    520 
    521 //
    522 // Add
    523 //
    524 
    525 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
    526   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
    527 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
    528   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
    529 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
    530   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
    531 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
    532   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
    533 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
    534   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
    535 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
    536   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
    537 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
    538   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
    539 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
    540   Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
    541 
    542 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
    543   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
    544 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
    545   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
    546 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
    547   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
    548 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
    549   Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
    550 
    551 //
    552 // Convert
    553 //
    554 
    555 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
    556           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
    557 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
    558           (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
    559 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
    560           (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
    561 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
    562           (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
    563 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
    564           (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
    565 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
    566           (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
    567 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
    568           (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
    569 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
    570           (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
    571 
    572 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
    573           (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
    574 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
    575           (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
    576 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
    577           (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
    578 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
    579           (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
    580 
    581 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
    582           (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
    583 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
    584           (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
    585 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
    586           (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
    587 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
    588           (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
    589 
    590 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
    591           (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
    592 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
    593           (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
    594 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
    595           (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
    596 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
    597           (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
    598 
    599 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
    600           (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
    601 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
    602           (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
    603 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
    604           (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
    605 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
    606           (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
    607 
    608 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
    609           (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
    610 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
    611           (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
    612 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
    613           (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
    614 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
    615           (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
    616 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
    617           (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
    618 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
    619           (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
    620 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
    621           (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
    622 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
    623           (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
    624 
    625 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
    626           (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
    627 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
    628           (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
    629 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
    630           (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
    631 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
    632           (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
    633 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
    634           (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
    635 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
    636           (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
    637 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
    638           (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
    639 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
    640           (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
    641 
    642 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
    643           (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
    644 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
    645           (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
    646 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
    647           (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
    648 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
    649           (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
    650 
    651 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
    652           (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
    653 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
    654           (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
    655 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
    656           (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
    657 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
    658           (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
    659 
    660 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
    661   Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
    662 
    663 def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t",
    664                        !strconcat(".reg .b32 %temp; \n\t",
    665              !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t",
    666                "}}"))),
    667              Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
    668 def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t",
    669                        !strconcat(".reg .b32 %temp; \n\t",
    670                          !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t",
    671                            "}}"))),
    672              Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
    673 
    674 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
    675           (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
    676 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
    677           (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
    678 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
    679           (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
    680 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
    681           (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
    682 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
    683           (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
    684 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
    685           (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
    686 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
    687           (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
    688 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
    689           (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
    690 
    691 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
    692           (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
    693 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
    694           (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
    695 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
    696           (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
    697 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
    698           (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
    699 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
    700           (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
    701 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
    702           (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
    703 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
    704           (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
    705 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
    706           (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
    707 
    708 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
    709           (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
    710 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
    711           (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
    712 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
    713           (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
    714 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
    715           (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
    716 
    717 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
    718           (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
    719 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
    720           (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
    721 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
    722           (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
    723 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
    724           (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
    725 
    726 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
    727           (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
    728 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
    729           (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
    730 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
    731           (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
    732 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
    733           (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
    734 
    735 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
    736           (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
    737 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
    738           (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
    739 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
    740           (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
    741 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
    742           (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
    743 
    744 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
    745           (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
    746 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
    747           (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
    748 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
    749           (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
    750 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
    751           (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
    752 
    753 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
    754           (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
    755 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
    756           (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
    757 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
    758           (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
    759 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
    760           (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
    761 
    762 
    763 // FIXME: Ideally, we could use these patterns instead of the scope-creating
    764 // patterns, but ptxas does not like these since .s16 is not compatible with
    765 // .f16.  The solution is to use .bXX for all integer register types, but we
    766 // are not there yet.
    767 //def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
    768 //          (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
    769 //def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
    770 //          (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
    771 //
    772 //def : Pat<(int_nvvm_h2f Int16Regs:$a),
    773 //          (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
    774 
    775 def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
    776                                    !strconcat(".reg .b16 %temp;\n\t",
    777            !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t",
    778            !strconcat("mov.b16 \t$dst, %temp;\n",
    779              "}}")))),
    780                                    Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>;
    781 def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t",
    782                                    !strconcat(".reg .b16 %temp;\n\t",
    783            !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t",
    784            !strconcat("mov.b16 \t$dst, %temp;\n",
    785              "}}")))),
    786            Int16Regs, Float32Regs, int_nvvm_f2h_rn>;
    787 
    788 def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
    789                             !strconcat(".reg .b16 %temp;\n\t",
    790           !strconcat("mov.b16 \t%temp, $src0;\n\t",
    791           !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t",
    792             "}}")))),
    793           Float32Regs, Int16Regs, int_nvvm_h2f>;
    794 
    795 def : Pat<(f32 (f16_to_fp Int16Regs:$a)),
    796           (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
    797 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
    798           (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
    799 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
    800           (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
    801 
    802 def : Pat<(f64 (f16_to_fp Int16Regs:$a)),
    803           (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
    804 def : Pat<(i16 (fp_to_f16 Float64Regs:$a)),
    805           (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
    806 
    807 //
    808 // Bitcast
    809 //
    810 
    811 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
    812   Float32Regs, int_nvvm_bitcast_f2i>;
    813 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
    814   Int32Regs, int_nvvm_bitcast_i2f>;
    815 
    816 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
    817   Int64Regs, int_nvvm_bitcast_ll2d>;
    818 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
    819   Float64Regs, int_nvvm_bitcast_d2ll>;
    820 
    821 //-----------------------------------
    822 // Atomic Functions
    823 //-----------------------------------
    824 
    825 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
    826  : PatFrag<ops, frag, [{
    827    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
    828 }]>;
    829 class ATOMIC_SHARED_CHK <dag ops, dag frag>
    830  : PatFrag<ops, frag, [{
    831    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
    832 }]>;
    833 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
    834  : PatFrag<ops, frag, [{
    835    return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
    836 }]>;
    837 
    838 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
    839   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
    840   Operand IMMType, SDNode IMM, Predicate Pred> {
    841   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
    842                !strconcat("atom",
    843          !strconcat(SpaceStr,
    844          !strconcat(OpcStr,
    845          !strconcat(TypeStr,
    846          !strconcat(" \t$dst, [$addr], $b;", ""))))),
    847          [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
    848   Requires<[Pred]>;
    849   def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
    850                !strconcat("atom",
    851          !strconcat(SpaceStr,
    852          !strconcat(OpcStr,
    853          !strconcat(TypeStr,
    854          !strconcat(" \t$dst, [$addr], $b;", ""))))),
    855          [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
    856   Requires<[Pred]>;
    857 }
    858 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
    859   string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> {
    860   defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
    861     IntOp, IMMType, IMM, Pred>;
    862   defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
    863     IntOp, IMMType, IMM, Pred>;
    864 }
    865 
    866 // has 2 operands, neg the second one
    867 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
    868   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
    869   Operand IMMType, Predicate Pred> {
    870   def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
    871     !strconcat("{{ \n\t",
    872          !strconcat(".reg \t.s",
    873          !strconcat(TypeStr,
    874          !strconcat(" temp; \n\t",
    875          !strconcat("neg.s",
    876          !strconcat(TypeStr,
    877          !strconcat(" \ttemp, $b; \n\t",
    878                !strconcat("atom",
    879          !strconcat(SpaceStr,
    880          !strconcat(OpcStr,
    881          !strconcat(".u",
    882          !strconcat(TypeStr,
    883          !strconcat(" \t$dst, [$addr], temp; \n\t",
    884            !strconcat("}}", "")))))))))))))),
    885          [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
    886   Requires<[Pred]>;
    887 }
    888 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
    889   string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
    890   Predicate Pred> {
    891  defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
    892    IntOp, IMMType, Pred> ;
    893  defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
    894    IntOp, IMMType, Pred> ;
    895 }
    896 
    897 // has 3 operands
    898 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
    899   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
    900   Operand IMMType, Predicate Pred> {
    901   def reg : NVPTXInst<(outs regclass:$dst),
    902     (ins ptrclass:$addr, regclass:$b, regclass:$c),
    903                !strconcat("atom",
    904          !strconcat(SpaceStr,
    905          !strconcat(OpcStr,
    906          !strconcat(TypeStr,
    907          !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
    908          [(set regclass:$dst,
    909            (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
    910          Requires<[Pred]>;
    911   def imm1 : NVPTXInst<(outs regclass:$dst),
    912     (ins ptrclass:$addr, IMMType:$b, regclass:$c),
    913                !strconcat("atom",
    914          !strconcat(SpaceStr,
    915          !strconcat(OpcStr,
    916          !strconcat(TypeStr,
    917          !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
    918          [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
    919   Requires<[Pred]>;
    920   def imm2 : NVPTXInst<(outs regclass:$dst),
    921     (ins ptrclass:$addr, regclass:$b, IMMType:$c),
    922                !strconcat("atom",
    923          !strconcat(SpaceStr,
    924          !strconcat(OpcStr,
    925          !strconcat(TypeStr,
    926          !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
    927          [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
    928   Requires<[Pred]>;
    929   def imm3 : NVPTXInst<(outs regclass:$dst),
    930     (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
    931                !strconcat("atom",
    932          !strconcat(SpaceStr,
    933          !strconcat(OpcStr,
    934          !strconcat(TypeStr,
    935          !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
    936          [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
    937   Requires<[Pred]>;
    938 }
    939 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
    940   string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> {
    941   defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
    942     IntOp, IMMType, Pred>;
    943   defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
    944     IntOp, IMMType, Pred>;
    945 }
    946 
    947 // atom_add
    948 
    949 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
    950   (atomic_load_add_32 node:$a, node:$b)>;
    951 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
    952   (atomic_load_add_32 node:$a, node:$b)>;
    953 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
    954   (atomic_load_add_32 node:$a, node:$b)>;
    955 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
    956   (atomic_load_add_64 node:$a, node:$b)>;
    957 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
    958   (atomic_load_add_64 node:$a, node:$b)>;
    959 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
    960   (atomic_load_add_64 node:$a, node:$b)>;
    961 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
    962   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
    963 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
    964   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
    965 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
    966   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
    967 
    968 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
    969   atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
    970 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
    971   atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>;
    972 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
    973   atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>;
    974 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
    975   ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>;
    976 
    977 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
    978   atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>;
    979 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
    980   atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>;
    981 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
    982   atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>;
    983 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
    984   ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>;
    985 
    986 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
    987   atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>;
    988 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
    989   atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
    990 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
    991   atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
    992 
    993 // atom_sub
    994 
    995 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
    996   (atomic_load_sub_32 node:$a, node:$b)>;
    997 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
    998   (atomic_load_sub_32 node:$a, node:$b)>;
    999 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1000   (atomic_load_sub_32 node:$a, node:$b)>;
   1001 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1002   (atomic_load_sub_64 node:$a, node:$b)>;
   1003 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1004   (atomic_load_sub_64 node:$a, node:$b)>;
   1005 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1006   (atomic_load_sub_64 node:$a, node:$b)>;
   1007 
   1008 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
   1009   atomic_load_sub_32_g, i32imm, hasAtomRedG32>;
   1010 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
   1011   atomic_load_sub_64_g, i64imm, hasAtomRedG64>;
   1012 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
   1013   atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>;
   1014 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
   1015   ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>;
   1016 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
   1017   atomic_load_sub_32_s, i32imm, hasAtomRedS32>;
   1018 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
   1019   atomic_load_sub_64_s, i64imm, hasAtomRedS64>;
   1020 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
   1021   atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>;
   1022 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
   1023   ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>;
   1024 
   1025 // atom_swap
   1026 
   1027 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1028   (atomic_swap_32 node:$a, node:$b)>;
   1029 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1030   (atomic_swap_32 node:$a, node:$b)>;
   1031 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1032   (atomic_swap_32 node:$a, node:$b)>;
   1033 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1034   (atomic_swap_64 node:$a, node:$b)>;
   1035 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1036   (atomic_swap_64 node:$a, node:$b)>;
   1037 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1038   (atomic_swap_64 node:$a, node:$b)>;
   1039 
   1040 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
   1041   atomic_swap_32_g, i32imm, imm, hasAtomRedG32>;
   1042 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
   1043   atomic_swap_32_s, i32imm, imm, hasAtomRedS32>;
   1044 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
   1045   atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>;
   1046 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
   1047   ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>;
   1048 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
   1049   atomic_swap_64_g, i64imm, imm, hasAtomRedG64>;
   1050 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
   1051   atomic_swap_64_s, i64imm, imm, hasAtomRedS64>;
   1052 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
   1053   atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>;
   1054 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
   1055   ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>;
   1056 
   1057 // atom_max
   1058 
   1059 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
   1060   , (atomic_load_max_32 node:$a, node:$b)>;
   1061 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1062   (atomic_load_max_32 node:$a, node:$b)>;
   1063 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1064   (atomic_load_max_32 node:$a, node:$b)>;
   1065 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
   1066   , (atomic_load_max_64 node:$a, node:$b)>;
   1067 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1068   (atomic_load_max_64 node:$a, node:$b)>;
   1069 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1070   (atomic_load_max_64 node:$a, node:$b)>;
   1071 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1072   (atomic_load_umax_32 node:$a, node:$b)>;
   1073 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1074   (atomic_load_umax_32 node:$a, node:$b)>;
   1075 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1076   (atomic_load_umax_32 node:$a, node:$b)>;
   1077 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1078   (atomic_load_umax_64 node:$a, node:$b)>;
   1079 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1080   (atomic_load_umax_64 node:$a, node:$b)>;
   1081 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1082   (atomic_load_umax_64 node:$a, node:$b)>;
   1083 
   1084 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
   1085   ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
   1086 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
   1087   ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>;
   1088 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
   1089   atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
   1090 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
   1091   ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
   1092 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
   1093   ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>;
   1094 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
   1095   ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>;
   1096 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
   1097   atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>;
   1098 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
   1099   ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>;
   1100 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
   1101   ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
   1102 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
   1103   ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>;
   1104 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
   1105   atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
   1106 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
   1107   ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
   1108 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
   1109   ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>;
   1110 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
   1111   ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>;
   1112 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
   1113   atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>;
   1114 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
   1115   ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>;
   1116 
   1117 // atom_min
   1118 
   1119 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1120   (atomic_load_min_32 node:$a, node:$b)>;
   1121 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1122   (atomic_load_min_32 node:$a, node:$b)>;
   1123 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1124   (atomic_load_min_32 node:$a, node:$b)>;
   1125 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1126   (atomic_load_min_64 node:$a, node:$b)>;
   1127 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1128   (atomic_load_min_64 node:$a, node:$b)>;
   1129 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1130   (atomic_load_min_64 node:$a, node:$b)>;
   1131 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1132   (atomic_load_umin_32 node:$a, node:$b)>;
   1133 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1134   (atomic_load_umin_32 node:$a, node:$b)>;
   1135 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1136   (atomic_load_umin_32 node:$a, node:$b)>;
   1137 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1138   (atomic_load_umin_64 node:$a, node:$b)>;
   1139 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1140   (atomic_load_umin_64 node:$a, node:$b)>;
   1141 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1142   (atomic_load_umin_64 node:$a, node:$b)>;
   1143 
   1144 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
   1145   ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
   1146 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
   1147   ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>;
   1148 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
   1149   atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
   1150 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
   1151   ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
   1152 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
   1153   ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>;
   1154 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
   1155   ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>;
   1156 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
   1157   atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>;
   1158 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
   1159   ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>;
   1160 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
   1161   ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
   1162 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
   1163   ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>;
   1164 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
   1165   atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
   1166 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
   1167   ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
   1168 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
   1169   ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>;
   1170 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
   1171   ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>;
   1172 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
   1173   atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>;
   1174 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
   1175   ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>;
   1176 
   1177 // atom_inc  atom_dec
   1178 
   1179 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1180   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
   1181 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1182   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
   1183 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1184   (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
   1185 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1186   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
   1187 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1188   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
   1189 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1190   (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
   1191 
   1192 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
   1193   atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>;
   1194 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
   1195   atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>;
   1196 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
   1197   atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>;
   1198 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
   1199   ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>;
   1200 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
   1201   atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>;
   1202 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
   1203   atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>;
   1204 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
   1205   atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>;
   1206 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
   1207   ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>;
   1208 
   1209 // atom_and
   1210 
   1211 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1212   (atomic_load_and_32 node:$a, node:$b)>;
   1213 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1214   (atomic_load_and_32 node:$a, node:$b)>;
   1215 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1216   (atomic_load_and_32 node:$a, node:$b)>;
   1217 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1218   (atomic_load_and_64 node:$a, node:$b)>;
   1219 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1220   (atomic_load_and_64 node:$a, node:$b)>;
   1221 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1222   (atomic_load_and_64 node:$a, node:$b)>;
   1223 
   1224 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
   1225   atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
   1226 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
   1227   atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>;
   1228 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
   1229   atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
   1230 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
   1231   ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
   1232 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
   1233   atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>;
   1234 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
   1235   atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>;
   1236 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
   1237   atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>;
   1238 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
   1239   ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>;
   1240 
   1241 // atom_or
   1242 
   1243 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1244   (atomic_load_or_32 node:$a, node:$b)>;
   1245 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1246   (atomic_load_or_32 node:$a, node:$b)>;
   1247 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1248   (atomic_load_or_32 node:$a, node:$b)>;
   1249 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1250   (atomic_load_or_64 node:$a, node:$b)>;
   1251 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1252   (atomic_load_or_64 node:$a, node:$b)>;
   1253 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1254   (atomic_load_or_64 node:$a, node:$b)>;
   1255 
   1256 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
   1257   atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
   1258 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
   1259   atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>;
   1260 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
   1261   ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
   1262 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
   1263   atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
   1264 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
   1265   atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>;
   1266 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
   1267   atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>;
   1268 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
   1269   ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>;
   1270 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
   1271   atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>;
   1272 
   1273 // atom_xor
   1274 
   1275 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1276   (atomic_load_xor_32 node:$a, node:$b)>;
   1277 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1278   (atomic_load_xor_32 node:$a, node:$b)>;
   1279 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1280   (atomic_load_xor_32 node:$a, node:$b)>;
   1281 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
   1282   (atomic_load_xor_64 node:$a, node:$b)>;
   1283 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   1284   (atomic_load_xor_64 node:$a, node:$b)>;
   1285 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   1286   (atomic_load_xor_64 node:$a, node:$b)>;
   1287 
   1288 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
   1289   atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
   1290 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
   1291   atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>;
   1292 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
   1293   atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
   1294 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
   1295   ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
   1296 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
   1297   atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>;
   1298 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
   1299   atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>;
   1300 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
   1301   atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>;
   1302 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
   1303   ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>;
   1304 
   1305 // atom_cas
   1306 
   1307 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
   1308   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
   1309 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
   1310   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
   1311 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
   1312   (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
   1313 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
   1314   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
   1315 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
   1316   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
   1317 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
   1318   (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
   1319 
   1320 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
   1321   atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>;
   1322 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
   1323   atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>;
   1324 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
   1325   atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>;
   1326 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
   1327   ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>;
   1328 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
   1329   atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>;
   1330 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
   1331   atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>;
   1332 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
   1333   atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>;
   1334 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
   1335   ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
   1336 
   1337 
   1338 //-----------------------------------
   1339 // Read Special Registers
   1340 //-----------------------------------
   1341 class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> :
   1342       NVPTXInst<(outs regclassOut:$dst), (ins),
   1343                OpStr,
   1344          [(set regclassOut:$dst, (IntOp))]>;
   1345 
   1346 def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs,
   1347   int_nvvm_read_ptx_sreg_tid_x>;
   1348 def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs,
   1349   int_nvvm_read_ptx_sreg_tid_y>;
   1350 def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs,
   1351   int_nvvm_read_ptx_sreg_tid_z>;
   1352 
   1353 def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs,
   1354   int_nvvm_read_ptx_sreg_ntid_x>;
   1355 def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs,
   1356   int_nvvm_read_ptx_sreg_ntid_y>;
   1357 def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs,
   1358   int_nvvm_read_ptx_sreg_ntid_z>;
   1359 
   1360 def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs,
   1361   int_nvvm_read_ptx_sreg_ctaid_x>;
   1362 def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs,
   1363   int_nvvm_read_ptx_sreg_ctaid_y>;
   1364 def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs,
   1365   int_nvvm_read_ptx_sreg_ctaid_z>;
   1366 
   1367 def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs,
   1368   int_nvvm_read_ptx_sreg_nctaid_x>;
   1369 def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs,
   1370   int_nvvm_read_ptx_sreg_nctaid_y>;
   1371 def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs,
   1372   int_nvvm_read_ptx_sreg_nctaid_z>;
   1373 
   1374 def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
   1375   int_nvvm_read_ptx_sreg_warpsize>;
   1376 
   1377 
   1378 //-----------------------------------
   1379 // Support for ldu on sm_20 or later
   1380 //-----------------------------------
   1381 
   1382 // Scalar
   1383 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
   1384   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
   1385                !strconcat("ldu.global.", TyStr),
   1386                       []>, Requires<[hasLDU]>;
   1387   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
   1388                !strconcat("ldu.global.", TyStr),
   1389                         []>, Requires<[hasLDU]>;
   1390  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
   1391                !strconcat("ldu.global.", TyStr),
   1392                       []>, Requires<[hasLDU]>;
   1393  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
   1394                !strconcat("ldu.global.", TyStr),
   1395                       []>, Requires<[hasLDU]>;
   1396  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
   1397                !strconcat("ldu.global.", TyStr),
   1398                         []>, Requires<[hasLDU]>;
   1399 }
   1400 
   1401 defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
   1402 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
   1403 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
   1404 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
   1405 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
   1406 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
   1407 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
   1408 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
   1409 
   1410 // vector
   1411 
   1412 // Elementized vector ldu
   1413 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
   1414  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1415                      (ins Int32Regs:$src),
   1416                      !strconcat("ldu.global.", TyStr), []>;
   1417  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1418                      (ins Int64Regs:$src),
   1419                      !strconcat("ldu.global.", TyStr), []>;
   1420  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1421                      (ins MEMri:$src),
   1422                      !strconcat("ldu.global.", TyStr), []>;
   1423  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1424                      (ins MEMri64:$src),
   1425                      !strconcat("ldu.global.", TyStr), []>;
   1426  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1427                      (ins imemAny:$src),
   1428                      !strconcat("ldu.global.", TyStr), []>;
   1429 }
   1430 
   1431 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
   1432  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1433                             regclass:$dst4), (ins Int32Regs:$src), 
   1434                !strconcat("ldu.global.", TyStr), []>;
   1435  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1436                             regclass:$dst4), (ins Int64Regs:$src), 
   1437                !strconcat("ldu.global.", TyStr), []>;
   1438  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1439                             regclass:$dst4), (ins MEMri:$src), 
   1440                !strconcat("ldu.global.", TyStr), []>;
   1441  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1442                             regclass:$dst4), (ins MEMri64:$src), 
   1443                !strconcat("ldu.global.", TyStr), []>;
   1444  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1445                             regclass:$dst4), (ins imemAny:$src), 
   1446                !strconcat("ldu.global.", TyStr), []>;
   1447 }
   1448 
   1449 defm INT_PTX_LDU_G_v2i8_ELE
   1450   : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
   1451 defm INT_PTX_LDU_G_v2i16_ELE
   1452   : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
   1453 defm INT_PTX_LDU_G_v2i32_ELE
   1454   : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
   1455 defm INT_PTX_LDU_G_v2f32_ELE
   1456   : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
   1457 defm INT_PTX_LDU_G_v2i64_ELE
   1458   : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
   1459 defm INT_PTX_LDU_G_v2f64_ELE
   1460   : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
   1461 defm INT_PTX_LDU_G_v4i8_ELE
   1462   : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
   1463 defm INT_PTX_LDU_G_v4i16_ELE
   1464   : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
   1465     Int16Regs>;
   1466 defm INT_PTX_LDU_G_v4i32_ELE
   1467   : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
   1468     Int32Regs>;
   1469 defm INT_PTX_LDU_G_v4f32_ELE
   1470   : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
   1471     Float32Regs>;
   1472 
   1473 
   1474 //-----------------------------------
   1475 // Support for ldg on sm_35 or later 
   1476 //-----------------------------------
   1477 
   1478 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
   1479   def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
   1480                !strconcat("ld.global.nc.", TyStr),
   1481                       []>, Requires<[hasLDG]>;
   1482   def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
   1483                !strconcat("ld.global.nc.", TyStr),
   1484                         []>, Requires<[hasLDG]>;
   1485  def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
   1486                !strconcat("ld.global.nc.", TyStr),
   1487                       []>, Requires<[hasLDG]>;
   1488  def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
   1489                !strconcat("ld.global.nc.", TyStr),
   1490                       []>, Requires<[hasLDG]>;
   1491  def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
   1492                !strconcat("ld.global.nc.", TyStr),
   1493                         []>, Requires<[hasLDG]>;
   1494 }
   1495 
   1496 defm INT_PTX_LDG_GLOBAL_i8
   1497   : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
   1498 defm INT_PTX_LDG_GLOBAL_i16
   1499   : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
   1500 defm INT_PTX_LDG_GLOBAL_i32
   1501   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
   1502 defm INT_PTX_LDG_GLOBAL_i64
   1503   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
   1504 defm INT_PTX_LDG_GLOBAL_f32
   1505   : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
   1506 defm INT_PTX_LDG_GLOBAL_f64
   1507   : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
   1508 defm INT_PTX_LDG_GLOBAL_p32
   1509   : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
   1510 defm INT_PTX_LDG_GLOBAL_p64
   1511   : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
   1512 
   1513 // vector
   1514 
   1515 // Elementized vector ldg 
   1516 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
   1517  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1518                      (ins Int32Regs:$src),
   1519                      !strconcat("ld.global.nc.", TyStr), []>;
   1520  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1521                      (ins Int64Regs:$src),
   1522                      !strconcat("ld.global.nc.", TyStr), []>;
   1523  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1524                      (ins MEMri:$src),
   1525                      !strconcat("ld.global.nc.", TyStr), []>;
   1526  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1527                      (ins MEMri64:$src),
   1528                      !strconcat("ld.global.nc.", TyStr), []>;
   1529  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
   1530                      (ins imemAny:$src),
   1531                      !strconcat("ld.global.nc.", TyStr), []>;
   1532 }
   1533 
   1534 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
   1535   def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1536                               regclass:$dst4), (ins Int32Regs:$src), 
   1537                !strconcat("ld.global.nc.", TyStr), []>;
   1538   def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1539                                regclass:$dst4), (ins Int64Regs:$src), 
   1540                !strconcat("ld.global.nc.", TyStr), []>;
   1541   def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1542                               regclass:$dst4), (ins MEMri:$src), 
   1543                !strconcat("ld.global.nc.", TyStr), []>;
   1544   def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1545                               regclass:$dst4), (ins MEMri64:$src), 
   1546                !strconcat("ld.global.nc.", TyStr), []>;
   1547   def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
   1548                              regclass:$dst4), (ins imemAny:$src), 
   1549                !strconcat("ld.global.nc.", TyStr), []>;
   1550 }
   1551 
   1552 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
   1553 defm INT_PTX_LDG_G_v2i8_ELE
   1554   : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
   1555 defm INT_PTX_LDG_G_v2i16_ELE
   1556   : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
   1557 defm INT_PTX_LDG_G_v2i32_ELE
   1558   : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
   1559 defm INT_PTX_LDG_G_v2f32_ELE
   1560   : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
   1561 defm INT_PTX_LDG_G_v2i64_ELE
   1562   : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
   1563 defm INT_PTX_LDG_G_v2f64_ELE
   1564   : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
   1565 defm INT_PTX_LDG_G_v4i8_ELE
   1566   : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
   1567 defm INT_PTX_LDG_G_v4i16_ELE
   1568   : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
   1569 defm INT_PTX_LDG_G_v4i32_ELE
   1570   : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
   1571 defm INT_PTX_LDG_G_v4f32_ELE
   1572   : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
   1573 
   1574 
   1575 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
   1576    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
   1577           !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
   1578       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
   1579    Requires<[hasGenericLdSt]>;
   1580    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
   1581           !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
   1582       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
   1583    Requires<[hasGenericLdSt]>;
   1584 
   1585 // @TODO: Are these actually needed?  I believe global addresses will be copied
   1586 // to register values anyway.
   1587    /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
   1588           !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
   1589       [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
   1590       Requires<[hasGenericLdSt]>;
   1591    def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
   1592           !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
   1593       [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
   1594       Requires<[hasGenericLdSt]>;*/
   1595 
   1596    def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
   1597           "mov.u32 \t$result, $src;",
   1598       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
   1599    def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
   1600           "mov.u64 \t$result, $src;",
   1601       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
   1602 
   1603 // @TODO: Are these actually needed?  I believe global addresses will be copied
   1604 // to register values anyway.
   1605    /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src),
   1606           "mov.u32 \t$result, $src;",
   1607       [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;
   1608    def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
   1609           "mov.u64 \t$result, $src;",
   1610       [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/
   1611 }
   1612 
   1613 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
   1614    def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
   1615           !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")),
   1616       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
   1617    Requires<[hasGenericLdSt]>;
   1618    def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
   1619           !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")),
   1620       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
   1621    Requires<[hasGenericLdSt]>;
   1622    def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
   1623           "mov.u32 \t$result, $src;",
   1624       [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
   1625    def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
   1626           "mov.u64 \t$result, $src;",
   1627       [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
   1628 }
   1629 
   1630 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
   1631 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
   1632 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
   1633 defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
   1634 
   1635 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
   1636 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
   1637 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
   1638 defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
   1639 
   1640 
   1641 // nvvm.ptr.gen.to.param
   1642 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
   1643   (ins Int32Regs:$src),
   1644                         "mov.u32 \t$result, $src;",
   1645                               [(set Int32Regs:$result,
   1646                                 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
   1647 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
   1648   (ins Int64Regs:$src),
   1649                         "mov.u64 \t$result, $src;",
   1650                               [(set Int64Regs:$result,
   1651                                 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
   1652 
   1653 
   1654 // nvvm.move intrinsicc
   1655 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
   1656                              "mov.b16 \t$r, $s;",
   1657                              [(set Int16Regs:$r,
   1658                                (int_nvvm_move_i16 Int16Regs:$s))]>;
   1659 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
   1660                              "mov.b32 \t$r, $s;",
   1661                              [(set Int32Regs:$r,
   1662                                (int_nvvm_move_i32 Int32Regs:$s))]>;
   1663 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
   1664                              "mov.b64 \t$r, $s;",
   1665                              [(set Int64Regs:$r,
   1666                                (int_nvvm_move_i64 Int64Regs:$s))]>;
   1667 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
   1668                              "mov.f32 \t$r, $s;",
   1669                              [(set Float32Regs:$r,
   1670                                (int_nvvm_move_float Float32Regs:$s))]>;
   1671 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
   1672                              "mov.f64 \t$r, $s;",
   1673                              [(set Float64Regs:$r,
   1674                                (int_nvvm_move_double Float64Regs:$s))]>;
   1675 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
   1676                              "mov.u32 \t$r, $s;",
   1677                              [(set Int32Regs:$r,
   1678                                (int_nvvm_move_ptr Int32Regs:$s))]>;
   1679 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
   1680                              "mov.u64 \t$r, $s;",
   1681                              [(set Int64Regs:$r,
   1682                                (int_nvvm_move_ptr Int64Regs:$s))]>;
   1683 
   1684 // @TODO: Are these actually needed, or will we always just see symbols
   1685 // copied to registers first?
   1686 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
   1687                              "mov.u32 \t$r, $s;",
   1688                              [(set Int32Regs:$r,
   1689                              (int_nvvm_move_ptr texternalsym:$s))]>;
   1690 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
   1691                              "mov.u64 \t$r, $s;",
   1692                              [(set Int64Regs:$r,
   1693                              (int_nvvm_move_ptr texternalsym:$s))]>;*/
   1694 
   1695 
   1696 // MoveParam        %r1, param
   1697 // ptr_local_to_gen %r2, %r1
   1698 // ptr_gen_to_local %r3, %r2
   1699 // ->
   1700 // mov %r1, param
   1701 
   1702 // @TODO: Revisit this.  There is a type
   1703 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
   1704 // instructions are not currently defined. However, we can use the ptr
   1705 // variants and the asm printer will do the right thing.
   1706 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
   1707                 (MoveParam texternalsym:$src)))),
   1708                (nvvm_move_ptr64  texternalsym:$src)>;
   1709 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
   1710                 (MoveParam texternalsym:$src)))),
   1711                (nvvm_move_ptr32  texternalsym:$src)>;
   1712 
   1713 def texsurf_handles
   1714   : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
   1715               "mov.u64 \t$result, $src;", []>;
   1716 
   1717 //-----------------------------------
   1718 // Compiler Error Warn
   1719 // - Just ignore them in codegen
   1720 //-----------------------------------
   1721 
   1722 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
   1723                 "// llvm.nvvm.compiler.warn()",
   1724                 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
   1725 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
   1726                 "// llvm.nvvm.compiler.warn()",
   1727                 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
   1728 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
   1729                 "// llvm.nvvm.compiler.error()",
   1730                 [(int_nvvm_compiler_error Int32Regs:$a)]>;
   1731 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
   1732                 "// llvm.nvvm.compiler.error()",
   1733                 [(int_nvvm_compiler_error Int64Regs:$a)]>;
   1734 
   1735 
   1736 // isspacep
   1737 
   1738 def ISSPACEP_CONST_32
   1739   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
   1740               "isspacep.const \t$d, $a;",
   1741               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
   1742     Requires<[hasPTX31]>;
   1743 def ISSPACEP_CONST_64
   1744   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   1745               "isspacep.const \t$d, $a;",
   1746               [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
   1747     Requires<[hasPTX31]>;
   1748 def ISSPACEP_GLOBAL_32
   1749   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
   1750               "isspacep.global \t$d, $a;",
   1751               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
   1752 def ISSPACEP_GLOBAL_64
   1753   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   1754               "isspacep.global \t$d, $a;",
   1755               [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
   1756 def ISSPACEP_LOCAL_32
   1757   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
   1758               "isspacep.local \t$d, $a;",
   1759               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
   1760 def ISSPACEP_LOCAL_64
   1761   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   1762               "isspacep.local \t$d, $a;",
   1763               [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
   1764 def ISSPACEP_SHARED_32
   1765   : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
   1766               "isspacep.shared \t$d, $a;",
   1767               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
   1768 def ISSPACEP_SHARED_64
   1769   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   1770               "isspacep.shared \t$d, $a;",
   1771               [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
   1772 
   1773 
   1774 // Special register reads
   1775 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
   1776                             (ins SpecialRegs:$r),
   1777                             "mov.b32\t$d, $r;", []>;
   1778 
   1779 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
   1780 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
   1781 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
   1782 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
   1783 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
   1784 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
   1785 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
   1786 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
   1787 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
   1788 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
   1789 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
   1790 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
   1791 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
   1792 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
   1793 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
   1794 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
   1795 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
   1796 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
   1797 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
   1798 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
   1799 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
   1800 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
   1801 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
   1802 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
   1803 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
   1804 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
   1805 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
   1806 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
   1807 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
   1808 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
   1809 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
   1810 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
   1811 
   1812 
   1813 // rotate builtin support
   1814 
   1815 def ROTATE_B32_HW_IMM
   1816   : NVPTXInst<(outs Int32Regs:$dst),
   1817               (ins  Int32Regs:$src, i32imm:$amt),
   1818               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
   1819               [(set Int32Regs:$dst,
   1820                  (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
   1821               Requires<[hasHWROT32]> ;
   1822 
   1823 def ROTATE_B32_HW_REG
   1824   : NVPTXInst<(outs Int32Regs:$dst),
   1825               (ins  Int32Regs:$src, Int32Regs:$amt),
   1826               "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
   1827               [(set Int32Regs:$dst,
   1828                  (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
   1829               Requires<[hasHWROT32]> ;
   1830 
   1831 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
   1832           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
   1833       Requires<[noHWROT32]> ;
   1834 
   1835 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
   1836           (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
   1837       Requires<[noHWROT32]> ;
   1838 
   1839 def GET_LO_INT64
   1840   : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
   1841               !strconcat("{{\n\t",
   1842               !strconcat(".reg .b32 %dummy;\n\t",
   1843               !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t",
   1844         !strconcat("}}", "")))),
   1845         []> ;
   1846 
   1847 def GET_HI_INT64
   1848   : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
   1849               !strconcat("{{\n\t",
   1850               !strconcat(".reg .b32 %dummy;\n\t",
   1851               !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t",
   1852         !strconcat("}}", "")))),
   1853         []> ;
   1854 
   1855 def PACK_TWO_INT32
   1856   : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
   1857               "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
   1858 
   1859 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
   1860           (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
   1861                           (GET_LO_INT64 Int64Regs:$src))> ;
   1862 
   1863 // funnel shift, requires >= sm_32
   1864 def SHF_L_WRAP_B32_IMM
   1865   : NVPTXInst<(outs Int32Regs:$dst),
   1866               (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
   1867               "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
   1868     Requires<[hasHWROT32]>;
   1869 
   1870 def SHF_L_WRAP_B32_REG
   1871   : NVPTXInst<(outs Int32Regs:$dst),
   1872               (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
   1873               "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
   1874     Requires<[hasHWROT32]>;
   1875 
   1876 def SHF_R_WRAP_B32_IMM
   1877   : NVPTXInst<(outs Int32Regs:$dst),
   1878               (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
   1879               "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
   1880     Requires<[hasHWROT32]>;
   1881 
   1882 def SHF_R_WRAP_B32_REG
   1883   : NVPTXInst<(outs Int32Regs:$dst),
   1884               (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
   1885               "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
   1886     Requires<[hasHWROT32]>;
   1887 
   1888 // HW version of rotate 64
   1889 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
   1890           (PACK_TWO_INT32
   1891             (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
   1892                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
   1893             (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
   1894                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
   1895       Requires<[hasHWROT32]>;
   1896 
   1897 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
   1898           (PACK_TWO_INT32
   1899             (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
   1900                                 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
   1901             (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
   1902                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
   1903       Requires<[hasHWROT32]>;
   1904 
   1905 
   1906 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
   1907           (PACK_TWO_INT32
   1908             (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
   1909                                 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
   1910             (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
   1911                                 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
   1912       Requires<[hasHWROT32]>;
   1913 
   1914 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
   1915           (PACK_TWO_INT32
   1916             (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
   1917                                 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
   1918             (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
   1919                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
   1920       Requires<[hasHWROT32]>;
   1921 
   1922 // SW version of rotate 64
   1923 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
   1924           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
   1925       Requires<[noHWROT32]>;
   1926 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
   1927           (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
   1928       Requires<[noHWROT32]>;
   1929 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
   1930           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
   1931       Requires<[noHWROT32]>;
   1932 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
   1933           (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
   1934       Requires<[noHWROT32]>;
   1935 
   1936 
   1937 //-----------------------------------
   1938 // Texture Intrinsics
   1939 //-----------------------------------
   1940 
   1941 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
   1942 // also defined in NVPTXReplaceImageHandles.cpp
   1943 
   1944 // texmode_independent
   1945 let IsTex = 1, IsTexModeUnified = 0 in {
   1946 // Texture fetch instructions using handles
   1947 def TEX_1D_F32_S32
   1948   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   1949                     Float32Regs:$b, Float32Regs:$a),
   1950               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
   1951               "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
   1952               []>;
   1953 def TEX_1D_F32_F32
   1954   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   1955                     Float32Regs:$b, Float32Regs:$a),
   1956               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
   1957               "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
   1958               []>;
   1959 def TEX_1D_F32_F32_LEVEL
   1960   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   1961                     Float32Regs:$b, Float32Regs:$a),
   1962               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
   1963               "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   1964               "[$t, $s, \\{$x\\}], $lod;",
   1965               []>;
   1966 def TEX_1D_F32_F32_GRAD
   1967   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   1968                     Float32Regs:$b, Float32Regs:$a),
   1969               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
   1970                    Float32Regs:$gradx, Float32Regs:$grady),
   1971               "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   1972               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
   1973               []>;
   1974 def TEX_1D_S32_S32
   1975   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   1976                     Int32Regs:$b, Int32Regs:$a),
   1977               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
   1978               "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
   1979               []>;
   1980 def TEX_1D_S32_F32
   1981   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   1982                     Int32Regs:$b, Int32Regs:$a),
   1983               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
   1984               "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
   1985               []>;
   1986 def TEX_1D_S32_F32_LEVEL
   1987   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   1988                     Int32Regs:$b, Int32Regs:$a),
   1989               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
   1990                    Float32Regs:$lod),
   1991               "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   1992               "[$t, $s, \\{$x\\}], $lod;",
   1993               []>;
   1994 def TEX_1D_S32_F32_GRAD
   1995   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   1996                     Int32Regs:$b, Int32Regs:$a),
   1997               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
   1998                    Float32Regs:$gradx, Float32Regs:$grady),
   1999               "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2000               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
   2001               []>;
   2002 def TEX_1D_U32_S32
   2003   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2004                     Int32Regs:$b, Int32Regs:$a),
   2005               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
   2006               "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
   2007               []>;
   2008 def TEX_1D_U32_F32
   2009   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2010                     Int32Regs:$b, Int32Regs:$a),
   2011               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
   2012               "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
   2013               []>;
   2014 def TEX_1D_U32_F32_LEVEL
   2015   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2016                     Int32Regs:$b, Int32Regs:$a),
   2017               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
   2018                    Float32Regs:$lod),
   2019               "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2020               "[$t, $s, \\{$x\\}], $lod;",
   2021               []>;
   2022 def TEX_1D_U32_F32_GRAD
   2023   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2024                     Int32Regs:$b, Int32Regs:$a),
   2025               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
   2026                    Float32Regs:$gradx, Float32Regs:$grady),
   2027               "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2028               "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
   2029               []>;
   2030 
   2031 def TEX_1D_ARRAY_F32_S32
   2032   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2033                     Float32Regs:$b, Float32Regs:$a),
   2034               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   2035               "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
   2036               "[$t, $s, \\{$l, $x\\}];",
   2037               []>;
   2038 def TEX_1D_ARRAY_F32_F32
   2039   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2040                     Float32Regs:$b, Float32Regs:$a),
   2041               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
   2042               "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2043               "[$t, $s, \\{$l, $x\\}];",
   2044               []>;
   2045 def TEX_1D_ARRAY_F32_F32_LEVEL
   2046   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2047                     Float32Regs:$b, Float32Regs:$a),
   2048               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2049                    Float32Regs:$lod),
   2050               "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2051               "[$t, $s, \\{$l, $x\\}], $lod;",
   2052               []>;
   2053 def TEX_1D_ARRAY_F32_F32_GRAD
   2054   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2055                     Float32Regs:$b, Float32Regs:$a),
   2056               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2057                    Float32Regs:$gradx, Float32Regs:$grady),
   2058               "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2059               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
   2060               []>;
   2061 def TEX_1D_ARRAY_S32_S32
   2062   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2063                     Int32Regs:$b, Int32Regs:$a),
   2064               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   2065               "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
   2066               "[$t, $s, \\{$l, $x\\}];",
   2067               []>;
   2068 def TEX_1D_ARRAY_S32_F32
   2069   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2070                     Int32Regs:$b, Int32Regs:$a),
   2071               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
   2072               "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2073               "[$t, $s, \\{$l, $x\\}];",
   2074               []>;
   2075 def TEX_1D_ARRAY_S32_F32_LEVEL
   2076   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2077                     Int32Regs:$b, Int32Regs:$a),
   2078               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2079                    Float32Regs:$lod),
   2080               "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2081               "[$t, $s, \\{$l, $x\\}], $lod;",
   2082               []>;
   2083 def TEX_1D_ARRAY_S32_F32_GRAD
   2084   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2085                     Int32Regs:$b, Int32Regs:$a),
   2086               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2087                    Float32Regs:$gradx, Float32Regs:$grady),
   2088               "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2089               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
   2090               []>;
   2091 def TEX_1D_ARRAY_U32_S32
   2092   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2093                     Int32Regs:$b, Int32Regs:$a),
   2094               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   2095               "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
   2096               "[$t, $s, \\{$l, $x\\}];",
   2097               []>;
   2098 def TEX_1D_ARRAY_U32_F32
   2099   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2100                     Int32Regs:$b, Int32Regs:$a),
   2101               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
   2102               "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2103               "[$t, $s, \\{$l, $x\\}];",
   2104               []>;
   2105 def TEX_1D_ARRAY_U32_F32_LEVEL
   2106   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2107                     Int32Regs:$b, Int32Regs:$a),
   2108               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2109                    Float32Regs:$lod),
   2110               "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2111               "[$t, $s, \\{$l, $x\\}], $lod;",
   2112               []>;
   2113 def TEX_1D_ARRAY_U32_F32_GRAD
   2114   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2115                     Int32Regs:$b, Int32Regs:$a),
   2116               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2117                    Float32Regs:$gradx, Float32Regs:$grady),
   2118               "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2119               "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
   2120               []>;
   2121 
   2122 def TEX_2D_F32_S32
   2123   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2124                     Float32Regs:$b, Float32Regs:$a),
   2125               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   2126               "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
   2127               "[$t, $s, \\{$x, $y\\}];",
   2128               []>;
   2129 def TEX_2D_F32_F32
   2130   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2131                     Float32Regs:$b, Float32Regs:$a),
   2132               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2133               "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2134               "[$t, $s, \\{$x, $y\\}];",
   2135               []>;
   2136 def TEX_2D_F32_F32_LEVEL
   2137   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2138                     Float32Regs:$b, Float32Regs:$a),
   2139               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2140                    Float32Regs:$lod),
   2141               "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2142               "[$t, $s, \\{$x, $y\\}], $lod;",
   2143               []>;
   2144 def TEX_2D_F32_F32_GRAD
   2145   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2146                     Float32Regs:$b, Float32Regs:$a),
   2147               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2148                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2149                    Float32Regs:$grady0, Float32Regs:$grady1),
   2150               "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2151               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
   2152               "\\{$grady0, $grady1\\};",
   2153               []>;
   2154 def TEX_2D_S32_S32
   2155   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2156                     Int32Regs:$b, Int32Regs:$a),
   2157               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   2158               "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
   2159               "[$t, $s, \\{$x, $y\\}];",
   2160               []>;
   2161 def TEX_2D_S32_F32
   2162   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2163                     Int32Regs:$b, Int32Regs:$a),
   2164               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2165               "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2166               "[$t, $s, \\{$x, $y\\}];",
   2167               []>;
   2168 def TEX_2D_S32_F32_LEVEL
   2169   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2170                     Int32Regs:$b, Int32Regs:$a),
   2171               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2172                    Float32Regs:$lod),
   2173               "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2174               "[$t, $s, \\{$x, $y\\}], $lod;",
   2175               []>;
   2176 def TEX_2D_S32_F32_GRAD
   2177   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2178                     Int32Regs:$b, Int32Regs:$a),
   2179               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2180                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2181                    Float32Regs:$grady0, Float32Regs:$grady1),
   2182               "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2183               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
   2184               "\\{$grady0, $grady1\\};",
   2185               []>;
   2186 def TEX_2D_U32_S32
   2187   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2188                     Int32Regs:$b, Int32Regs:$a),
   2189               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   2190               "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
   2191               "[$t, $s, \\{$x, $y\\}];",
   2192               []>;
   2193 def TEX_2D_U32_F32
   2194   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2195                     Int32Regs:$b, Int32Regs:$a),
   2196               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2197               "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2198               "[$t, $s, \\{$x, $y\\}];",
   2199               []>;
   2200 def TEX_2D_U32_F32_LEVEL
   2201   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2202                     Int32Regs:$b, Int32Regs:$a),
   2203               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2204                    Float32Regs:$lod),
   2205               "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2206               "[$t, $s, \\{$x, $y\\}], $lod;",
   2207               []>;
   2208 def TEX_2D_U32_F32_GRAD
   2209   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2210                     Int32Regs:$b, Int32Regs:$a),
   2211               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2212                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2213                    Float32Regs:$grady0, Float32Regs:$grady1),
   2214               "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2215               "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
   2216               "\\{$grady0, $grady1\\};",
   2217               []>;
   2218 
   2219 def TEX_2D_ARRAY_F32_S32
   2220   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2221                     Float32Regs:$b, Float32Regs:$a),
   2222               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   2223                    Int32Regs:$y),
   2224               "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
   2225               "[$t, $s, \\{$l, $x, $y, $y\\}];",
   2226               []>;
   2227 def TEX_2D_ARRAY_F32_F32
   2228   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2229                     Float32Regs:$b, Float32Regs:$a),
   2230               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2231                    Float32Regs:$y),
   2232               "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2233               "[$t, $s, \\{$l, $x, $y, $y\\}];",
   2234               []>;
   2235 def TEX_2D_ARRAY_F32_F32_LEVEL
   2236   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2237                     Float32Regs:$b, Float32Regs:$a),
   2238               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2239                    Float32Regs:$y, Float32Regs:$lod),
   2240               "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2241               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
   2242               []>;
   2243 def TEX_2D_ARRAY_F32_F32_GRAD
   2244   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2245                     Float32Regs:$b, Float32Regs:$a),
   2246               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2247                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
   2248                    Float32Regs:$grady0, Float32Regs:$grady1),
   2249               "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2250               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
   2251               "\\{$grady0, $grady1\\};",
   2252               []>;
   2253 def TEX_2D_ARRAY_S32_S32
   2254   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2255                     Int32Regs:$b, Int32Regs:$a),
   2256               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   2257                    Int32Regs:$y),
   2258               "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
   2259               "[$t, $s, \\{$l, $x, $y, $y\\}];",
   2260               []>;
   2261 def TEX_2D_ARRAY_S32_F32
   2262   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2263                     Int32Regs:$b, Int32Regs:$a),
   2264               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2265                    Float32Regs:$y),
   2266               "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2267               "[$t, $s, \\{$l, $x, $y, $y\\}];",
   2268               []>;
   2269 def TEX_2D_ARRAY_S32_F32_LEVEL
   2270   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2271                     Int32Regs:$b, Int32Regs:$a),
   2272               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2273                    Float32Regs:$y, Float32Regs:$lod),
   2274               "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2275               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
   2276               []>;
   2277 def TEX_2D_ARRAY_S32_F32_GRAD
   2278   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2279                     Int32Regs:$b, Int32Regs:$a),
   2280               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2281                    Float32Regs:$y,
   2282                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2283                    Float32Regs:$grady0, Float32Regs:$grady1),
   2284               "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2285               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
   2286               "\\{$grady0, $grady1\\};",
   2287               []>;
   2288 def TEX_2D_ARRAY_U32_S32
   2289   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2290                     Int32Regs:$b, Int32Regs:$a),
   2291               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   2292                    Int32Regs:$y),
   2293               "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
   2294               "[$t, $s, \\{$l, $x, $y, $y\\}];",
   2295               []>;
   2296 def TEX_2D_ARRAY_U32_F32
   2297   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2298                     Int32Regs:$b, Int32Regs:$a),
   2299               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2300                    Float32Regs:$y),
   2301               "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2302               "[$t, $s, \\{$l, $x, $y, $y\\}];",
   2303               []>;
   2304 def TEX_2D_ARRAY_U32_F32_LEVEL
   2305   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2306                     Int32Regs:$b, Int32Regs:$a),
   2307               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2308                    Float32Regs:$y, Float32Regs:$lod),
   2309               "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2310               "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
   2311               []>;
   2312 def TEX_2D_ARRAY_U32_F32_GRAD
   2313   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2314                     Int32Regs:$b, Int32Regs:$a),
   2315               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
   2316                    Float32Regs:$y,
   2317                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2318                    Float32Regs:$grady0, Float32Regs:$grady1),
   2319               "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2320               "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
   2321               "\\{$grady0, $grady1\\};",
   2322               []>;
   2323 
   2324 def TEX_3D_F32_S32
   2325   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2326                     Float32Regs:$b, Float32Regs:$a),
   2327               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   2328                    Int32Regs:$z),
   2329               "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
   2330               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2331               []>;
   2332 def TEX_3D_F32_F32
   2333   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2334                     Float32Regs:$b, Float32Regs:$a),
   2335               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2336                    Float32Regs:$z),
   2337               "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2338               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2339               []>;
   2340 def TEX_3D_F32_F32_LEVEL
   2341   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2342                     Float32Regs:$b, Float32Regs:$a),
   2343               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2344                    Float32Regs:$z, Float32Regs:$lod),
   2345               "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2346               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
   2347               []>;
   2348 def TEX_3D_F32_F32_GRAD
   2349   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2350                     Float32Regs:$b, Float32Regs:$a),
   2351               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2352                    Float32Regs:$z,
   2353                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2354                    Float32Regs:$gradx2, Float32Regs:$grady0,
   2355                    Float32Regs:$grady1, Float32Regs:$grady2),
   2356               "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2357               "[$t, $s, \\{$x, $y, $z, $z\\}], "
   2358               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
   2359               "\\{$grady0, $grady1, $grady2, $grady2\\};",
   2360               []>;
   2361 def TEX_3D_S32_S32
   2362   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2363                     Int32Regs:$b, Int32Regs:$a),
   2364               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   2365                    Int32Regs:$z),
   2366               "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
   2367               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2368               []>;
   2369 def TEX_3D_S32_F32
   2370   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2371                     Int32Regs:$b, Int32Regs:$a),
   2372               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2373                    Float32Regs:$z),
   2374               "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2375               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2376               []>;
   2377 def TEX_3D_S32_F32_LEVEL
   2378   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2379                     Int32Regs:$b, Int32Regs:$a),
   2380               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2381                    Float32Regs:$z, Float32Regs:$lod),
   2382               "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2383               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
   2384               []>;
   2385 def TEX_3D_S32_F32_GRAD
   2386   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2387                     Int32Regs:$b, Int32Regs:$a),
   2388               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2389                    Float32Regs:$z,
   2390                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2391                    Float32Regs:$gradx2, Float32Regs:$grady0,
   2392                    Float32Regs:$grady1, Float32Regs:$grady2),
   2393               "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2394               "[$t, $s, \\{$x, $y, $z, $z\\}], "
   2395               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
   2396               "\\{$grady0, $grady1, $grady2, $grady2\\};",
   2397               []>;
   2398 def TEX_3D_U32_S32
   2399   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2400                     Int32Regs:$b, Int32Regs:$a),
   2401               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   2402                    Int32Regs:$z),
   2403               "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
   2404               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2405               []>;
   2406 def TEX_3D_U32_F32
   2407   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2408                     Int32Regs:$b, Int32Regs:$a),
   2409               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2410                    Float32Regs:$z),
   2411               "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2412               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2413               []>;
   2414 def TEX_3D_U32_F32_LEVEL
   2415   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2416                     Int32Regs:$b, Int32Regs:$a),
   2417               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2418                    Float32Regs:$z, Float32Regs:$lod),
   2419               "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2420               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
   2421               []>;
   2422 def TEX_3D_U32_F32_GRAD
   2423   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2424                     Int32Regs:$b, Int32Regs:$a),
   2425               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
   2426                    Float32Regs:$z,
   2427                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2428                    Float32Regs:$gradx2, Float32Regs:$grady0,
   2429                    Float32Regs:$grady1, Float32Regs:$grady2),
   2430               "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2431               "[$t, $s, \\{$x, $y, $z, $z\\}], "
   2432               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
   2433               "\\{$grady0, $grady1, $grady2, $grady2\\};",
   2434               []>;
   2435 
   2436 def TEX_CUBE_F32_F32
   2437   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2438                     Float32Regs:$b, Float32Regs:$a),
   2439               (ins Int64Regs:$t, Int64Regs:$s,
   2440                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   2441               "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2442               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2443               []>;
   2444 def TEX_CUBE_F32_F32_LEVEL
   2445   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2446                     Float32Regs:$b, Float32Regs:$a),
   2447               (ins Int64Regs:$t, Int64Regs:$s,
   2448                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   2449                    Float32Regs:$lod),
   2450               "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2451               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
   2452               []>;
   2453 def TEX_CUBE_S32_F32
   2454   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2455                     Int32Regs:$b, Int32Regs:$a),
   2456               (ins Int64Regs:$t, Int64Regs:$s,
   2457                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   2458               "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2459               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2460               []>;
   2461 def TEX_CUBE_S32_F32_LEVEL
   2462   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2463                     Int32Regs:$b, Int32Regs:$a),
   2464               (ins Int64Regs:$t, Int64Regs:$s,
   2465                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   2466                    Float32Regs:$lod),
   2467               "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2468               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
   2469               []>;
   2470 def TEX_CUBE_U32_F32
   2471   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2472                     Int32Regs:$b, Int32Regs:$a),
   2473               (ins Int64Regs:$t, Int64Regs:$s,
   2474                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   2475               "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2476               "[$t, $s, \\{$x, $y, $z, $z\\}];",
   2477               []>;
   2478 def TEX_CUBE_U32_F32_LEVEL
   2479   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2480                     Int32Regs:$b, Int32Regs:$a),
   2481               (ins Int64Regs:$t, Int64Regs:$s,
   2482                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   2483                    Float32Regs:$lod),
   2484               "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2485               "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
   2486               []>;
   2487 
   2488 def TEX_CUBE_ARRAY_F32_F32
   2489   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2490                     Float32Regs:$b, Float32Regs:$a),
   2491               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
   2492                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   2493               "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2494               "[$t, $s, \\{$l, $x, $y, $z\\}];",
   2495               []>;
   2496 def TEX_CUBE_ARRAY_F32_F32_LEVEL
   2497   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2498                     Float32Regs:$b, Float32Regs:$a),
   2499               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
   2500                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   2501                    Float32Regs:$lod),
   2502               "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2503               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
   2504               []>;
   2505 def TEX_CUBE_ARRAY_S32_F32
   2506   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2507                     Int32Regs:$b, Int32Regs:$a),
   2508               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
   2509                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   2510               "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2511               "[$t, $s, \\{$l, $x, $y, $z\\}];",
   2512               []>;
   2513 def TEX_CUBE_ARRAY_S32_F32_LEVEL
   2514   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2515                     Int32Regs:$b, Int32Regs:$a),
   2516               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
   2517                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   2518                    Float32Regs:$lod),
   2519               "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2520               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
   2521               []>;
   2522 def TEX_CUBE_ARRAY_U32_F32
   2523   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2524                     Int32Regs:$b, Int32Regs:$a),
   2525               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
   2526                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   2527               "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2528               "[$t, $s, \\{$l, $x, $y, $z\\}];",
   2529               []>;
   2530 def TEX_CUBE_ARRAY_U32_F32_LEVEL
   2531   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2532                     Int32Regs:$b, Int32Regs:$a),
   2533               (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
   2534                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   2535                    Float32Regs:$lod),
   2536               "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2537               "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
   2538               []>;
   2539 
   2540 def TLD4_R_2D_F32_F32
   2541   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   2542                     Float32Regs:$v2, Float32Regs:$v3),
   2543               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2544               "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   2545               "[$t, $s, \\{$x, $y\\}];",
   2546               []>;
   2547 def TLD4_G_2D_F32_F32
   2548   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   2549                     Float32Regs:$v2, Float32Regs:$v3),
   2550               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2551               "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   2552               "[$t, $s, \\{$x, $y\\}];",
   2553               []>;
   2554 def TLD4_B_2D_F32_F32
   2555   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   2556                     Float32Regs:$v2, Float32Regs:$v3),
   2557               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2558               "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   2559               "[$t, $s, \\{$x, $y\\}];",
   2560               []>;
   2561 def TLD4_A_2D_F32_F32
   2562   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   2563                     Float32Regs:$v2, Float32Regs:$v3),
   2564               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2565               "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   2566               "[$t, $s, \\{$x, $y\\}];",
   2567               []>;
   2568 def TLD4_R_2D_S32_F32
   2569   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2570                     Int32Regs:$v2, Int32Regs:$v3),
   2571               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2572               "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   2573               "[$t, $s, \\{$x, $y\\}];",
   2574               []>;
   2575 def TLD4_G_2D_S32_F32
   2576   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2577                     Int32Regs:$v2, Int32Regs:$v3),
   2578               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2579               "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   2580               "[$t, $s, \\{$x, $y\\}];",
   2581               []>;
   2582 def TLD4_B_2D_S32_F32
   2583   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2584                     Int32Regs:$v2, Int32Regs:$v3),
   2585               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2586               "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   2587               "[$t, $s, \\{$x, $y\\}];",
   2588               []>;
   2589 def TLD4_A_2D_S32_F32
   2590   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2591                     Int32Regs:$v2, Int32Regs:$v3),
   2592               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2593               "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   2594               "[$t, $s, \\{$x, $y\\}];",
   2595               []>;
   2596 def TLD4_R_2D_U32_F32
   2597   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2598                     Int32Regs:$v2, Int32Regs:$v3),
   2599               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2600               "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   2601               "[$t, $s, \\{$x, $y\\}];",
   2602               []>;
   2603 def TLD4_G_2D_U32_F32
   2604   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2605                     Int32Regs:$v2, Int32Regs:$v3),
   2606               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2607               "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   2608               "[$t, $s, \\{$x, $y\\}];",
   2609               []>;
   2610 def TLD4_B_2D_U32_F32
   2611   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2612                     Int32Regs:$v2, Int32Regs:$v3),
   2613               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2614               "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   2615               "[$t, $s, \\{$x, $y\\}];",
   2616               []>;
   2617 def TLD4_A_2D_U32_F32
   2618   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   2619                     Int32Regs:$v2, Int32Regs:$v3),
   2620               (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
   2621               "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   2622               "[$t, $s, \\{$x, $y\\}];",
   2623               []>;
   2624 }
   2625 
   2626 
   2627 // texmode_unified
   2628 let IsTex = 1, IsTexModeUnified = 1 in {
   2629 // Texture fetch instructions using handles
   2630 def TEX_UNIFIED_1D_F32_S32
   2631   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2632                     Float32Regs:$b, Float32Regs:$a),
   2633               (ins Int64Regs:$t, Int32Regs:$x),
   2634               "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
   2635               []>;
   2636 def TEX_UNIFIED_1D_F32_F32
   2637   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2638                     Float32Regs:$b, Float32Regs:$a),
   2639               (ins Int64Regs:$t, Float32Regs:$x),
   2640               "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
   2641               []>;
   2642 def TEX_UNIFIED_1D_F32_F32_LEVEL
   2643   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2644                     Float32Regs:$b, Float32Regs:$a),
   2645               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
   2646               "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2647               "[$t, \\{$x\\}], $lod;",
   2648               []>;
   2649 def TEX_UNIFIED_1D_F32_F32_GRAD
   2650   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2651                     Float32Regs:$b, Float32Regs:$a),
   2652               (ins Int64Regs:$t, Float32Regs:$x,
   2653                    Float32Regs:$gradx, Float32Regs:$grady),
   2654               "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2655               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
   2656               []>;
   2657 def TEX_UNIFIED_1D_S32_S32
   2658   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2659                     Int32Regs:$b, Int32Regs:$a),
   2660               (ins Int64Regs:$t, Int32Regs:$x),
   2661               "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
   2662               []>;
   2663 def TEX_UNIFIED_1D_S32_F32
   2664   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2665                     Int32Regs:$b, Int32Regs:$a),
   2666               (ins Int64Regs:$t, Float32Regs:$x),
   2667               "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
   2668               []>;
   2669 def TEX_UNIFIED_1D_S32_F32_LEVEL
   2670   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2671                     Int32Regs:$b, Int32Regs:$a),
   2672               (ins Int64Regs:$t, Float32Regs:$x,
   2673                    Float32Regs:$lod),
   2674               "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2675               "[$t, \\{$x\\}], $lod;",
   2676               []>;
   2677 def TEX_UNIFIED_1D_S32_F32_GRAD
   2678   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2679                     Int32Regs:$b, Int32Regs:$a),
   2680               (ins Int64Regs:$t, Float32Regs:$x,
   2681                    Float32Regs:$gradx, Float32Regs:$grady),
   2682               "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2683               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
   2684               []>;
   2685 def TEX_UNIFIED_1D_U32_S32
   2686   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2687                     Int32Regs:$b, Int32Regs:$a),
   2688               (ins Int64Regs:$t, Int32Regs:$x),
   2689               "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
   2690               []>;
   2691 def TEX_UNIFIED_1D_U32_F32
   2692   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2693                     Int32Regs:$b, Int32Regs:$a),
   2694               (ins Int64Regs:$t, Float32Regs:$x),
   2695               "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
   2696               []>;
   2697 def TEX_UNIFIED_1D_U32_F32_LEVEL
   2698   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2699                     Int32Regs:$b, Int32Regs:$a),
   2700               (ins Int64Regs:$t, Float32Regs:$x,
   2701                    Float32Regs:$lod),
   2702               "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2703               "[$t, \\{$x\\}], $lod;",
   2704               []>;
   2705 def TEX_UNIFIED_1D_U32_F32_GRAD
   2706   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2707                     Int32Regs:$b, Int32Regs:$a),
   2708               (ins Int64Regs:$t, Float32Regs:$x,
   2709                    Float32Regs:$gradx, Float32Regs:$grady),
   2710               "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2711               "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
   2712               []>;
   2713 
   2714 def TEX_UNIFIED_1D_ARRAY_F32_S32
   2715   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2716                     Float32Regs:$b, Float32Regs:$a),
   2717               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
   2718               "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
   2719               "[$t, \\{$l, $x\\}];",
   2720               []>;
   2721 def TEX_UNIFIED_1D_ARRAY_F32_F32
   2722   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2723                     Float32Regs:$b, Float32Regs:$a),
   2724               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
   2725               "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2726               "[$t, \\{$l, $x\\}];",
   2727               []>;
   2728 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
   2729   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2730                     Float32Regs:$b, Float32Regs:$a),
   2731               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2732                    Float32Regs:$lod),
   2733               "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2734               "[$t, \\{$l, $x\\}], $lod;",
   2735               []>;
   2736 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
   2737   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2738                     Float32Regs:$b, Float32Regs:$a),
   2739               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2740                    Float32Regs:$gradx, Float32Regs:$grady),
   2741               "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2742               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
   2743               []>;
   2744 def TEX_UNIFIED_1D_ARRAY_S32_S32
   2745   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2746                     Int32Regs:$b, Int32Regs:$a),
   2747               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
   2748               "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
   2749               "[$t, \\{$l, $x\\}];",
   2750               []>;
   2751 def TEX_UNIFIED_1D_ARRAY_S32_F32
   2752   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2753                     Int32Regs:$b, Int32Regs:$a),
   2754               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
   2755               "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2756               "[$t, \\{$l, $x\\}];",
   2757               []>;
   2758 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
   2759   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2760                     Int32Regs:$b, Int32Regs:$a),
   2761               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2762                    Float32Regs:$lod),
   2763               "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2764               "[$t, \\{$l, $x\\}], $lod;",
   2765               []>;
   2766 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
   2767   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2768                     Int32Regs:$b, Int32Regs:$a),
   2769               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2770                    Float32Regs:$gradx, Float32Regs:$grady),
   2771               "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2772               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
   2773               []>;
   2774 def TEX_UNIFIED_1D_ARRAY_U32_S32
   2775   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2776                     Int32Regs:$b, Int32Regs:$a),
   2777               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
   2778               "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
   2779               "[$t, \\{$l, $x\\}];",
   2780               []>;
   2781 def TEX_UNIFIED_1D_ARRAY_U32_F32
   2782   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2783                     Int32Regs:$b, Int32Regs:$a),
   2784               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
   2785               "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2786               "[$t, \\{$l, $x\\}];",
   2787               []>;
   2788 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
   2789   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2790                     Int32Regs:$b, Int32Regs:$a),
   2791               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2792                    Float32Regs:$lod),
   2793               "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2794               "[$t, \\{$l, $x\\}], $lod;",
   2795               []>;
   2796 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
   2797   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2798                     Int32Regs:$b, Int32Regs:$a),
   2799               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2800                    Float32Regs:$gradx, Float32Regs:$grady),
   2801               "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2802               "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
   2803               []>;
   2804 
   2805 def TEX_UNIFIED_2D_F32_S32
   2806   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2807                     Float32Regs:$b, Float32Regs:$a),
   2808               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
   2809               "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
   2810               "[$t, \\{$x, $y\\}];",
   2811               []>;
   2812 def TEX_UNIFIED_2D_F32_F32
   2813   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2814                     Float32Regs:$b, Float32Regs:$a),
   2815               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   2816               "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2817               "[$t, \\{$x, $y\\}];",
   2818               []>;
   2819 def TEX_UNIFIED_2D_F32_F32_LEVEL
   2820   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2821                     Float32Regs:$b, Float32Regs:$a),
   2822               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   2823                    Float32Regs:$lod),
   2824               "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2825               "[$t, \\{$x, $y\\}], $lod;",
   2826               []>;
   2827 def TEX_UNIFIED_2D_F32_F32_GRAD
   2828   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2829                     Float32Regs:$b, Float32Regs:$a),
   2830               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   2831                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2832                    Float32Regs:$grady0, Float32Regs:$grady1),
   2833               "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2834               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
   2835               "\\{$grady0, $grady1\\};",
   2836               []>;
   2837 def TEX_UNIFIED_2D_S32_S32
   2838   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2839                     Int32Regs:$b, Int32Regs:$a),
   2840               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
   2841               "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
   2842               "[$t, \\{$x, $y\\}];",
   2843               []>;
   2844 def TEX_UNIFIED_2D_S32_F32
   2845   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2846                     Int32Regs:$b, Int32Regs:$a),
   2847               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   2848               "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2849               "[$t, \\{$x, $y\\}];",
   2850               []>;
   2851 def TEX_UNIFIED_2D_S32_F32_LEVEL
   2852   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2853                     Int32Regs:$b, Int32Regs:$a),
   2854               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   2855                    Float32Regs:$lod),
   2856               "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2857               "[$t, \\{$x, $y\\}], $lod;",
   2858               []>;
   2859 def TEX_UNIFIED_2D_S32_F32_GRAD
   2860   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2861                     Int32Regs:$b, Int32Regs:$a),
   2862               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   2863                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2864                    Float32Regs:$grady0, Float32Regs:$grady1),
   2865               "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2866               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
   2867               "\\{$grady0, $grady1\\};",
   2868               []>;
   2869 def TEX_UNIFIED_2D_U32_S32
   2870   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2871                     Int32Regs:$b, Int32Regs:$a),
   2872               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
   2873               "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
   2874               "[$t, \\{$x, $y\\}];",
   2875               []>;
   2876 def TEX_UNIFIED_2D_U32_F32
   2877   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2878                     Int32Regs:$b, Int32Regs:$a),
   2879               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   2880               "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2881               "[$t, \\{$x, $y\\}];",
   2882               []>;
   2883 def TEX_UNIFIED_2D_U32_F32_LEVEL
   2884   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2885                     Int32Regs:$b, Int32Regs:$a),
   2886               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   2887                    Float32Regs:$lod),
   2888               "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2889               "[$t, \\{$x, $y\\}], $lod;",
   2890               []>;
   2891 def TEX_UNIFIED_2D_U32_F32_GRAD
   2892   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2893                     Int32Regs:$b, Int32Regs:$a),
   2894               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   2895                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2896                    Float32Regs:$grady0, Float32Regs:$grady1),
   2897               "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2898               "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
   2899               "\\{$grady0, $grady1\\};",
   2900               []>;
   2901 
   2902 def TEX_UNIFIED_2D_ARRAY_F32_S32
   2903   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2904                     Float32Regs:$b, Float32Regs:$a),
   2905               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
   2906                    Int32Regs:$y),
   2907               "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
   2908               "[$t, \\{$l, $x, $y, $y\\}];",
   2909               []>;
   2910 def TEX_UNIFIED_2D_ARRAY_F32_F32
   2911   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2912                     Float32Regs:$b, Float32Regs:$a),
   2913               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2914                    Float32Regs:$y),
   2915               "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2916               "[$t, \\{$l, $x, $y, $y\\}];",
   2917               []>;
   2918 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
   2919   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2920                     Float32Regs:$b, Float32Regs:$a),
   2921               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2922                    Float32Regs:$y, Float32Regs:$lod),
   2923               "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2924               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
   2925               []>;
   2926 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
   2927   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   2928                     Float32Regs:$b, Float32Regs:$a),
   2929               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2930                    Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
   2931                    Float32Regs:$grady0, Float32Regs:$grady1),
   2932               "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   2933               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
   2934               "\\{$grady0, $grady1\\};",
   2935               []>;
   2936 def TEX_UNIFIED_2D_ARRAY_S32_S32
   2937   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2938                     Int32Regs:$b, Int32Regs:$a),
   2939               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
   2940                    Int32Regs:$y),
   2941               "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
   2942               "[$t, \\{$l, $x, $y, $y\\}];",
   2943               []>;
   2944 def TEX_UNIFIED_2D_ARRAY_S32_F32
   2945   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2946                     Int32Regs:$b, Int32Regs:$a),
   2947               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2948                    Float32Regs:$y),
   2949               "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2950               "[$t, \\{$l, $x, $y, $y\\}];",
   2951               []>;
   2952 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
   2953   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2954                     Int32Regs:$b, Int32Regs:$a),
   2955               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2956                    Float32Regs:$y, Float32Regs:$lod),
   2957               "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2958               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
   2959               []>;
   2960 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
   2961   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2962                     Int32Regs:$b, Int32Regs:$a),
   2963               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2964                    Float32Regs:$y,
   2965                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   2966                    Float32Regs:$grady0, Float32Regs:$grady1),
   2967               "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   2968               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
   2969               "\\{$grady0, $grady1\\};",
   2970               []>;
   2971 def TEX_UNIFIED_2D_ARRAY_U32_S32
   2972   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2973                     Int32Regs:$b, Int32Regs:$a),
   2974               (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
   2975                    Int32Regs:$y),
   2976               "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
   2977               "[$t, \\{$l, $x, $y, $y\\}];",
   2978               []>;
   2979 def TEX_UNIFIED_2D_ARRAY_U32_F32
   2980   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2981                     Int32Regs:$b, Int32Regs:$a),
   2982               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2983                    Float32Regs:$y),
   2984               "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2985               "[$t, \\{$l, $x, $y, $y\\}];",
   2986               []>;
   2987 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
   2988   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2989                     Int32Regs:$b, Int32Regs:$a),
   2990               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2991                    Float32Regs:$y, Float32Regs:$lod),
   2992               "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   2993               "[$t, \\{$l, $x, $y, $y\\}], $lod;",
   2994               []>;
   2995 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
   2996   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   2997                     Int32Regs:$b, Int32Regs:$a),
   2998               (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
   2999                    Float32Regs:$y,
   3000                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   3001                    Float32Regs:$grady0, Float32Regs:$grady1),
   3002               "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   3003               "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
   3004               "\\{$grady0, $grady1\\};",
   3005               []>;
   3006 
   3007 def TEX_UNIFIED_3D_F32_S32
   3008   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3009                     Float32Regs:$b, Float32Regs:$a),
   3010               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
   3011                    Int32Regs:$z),
   3012               "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
   3013               "[$t, \\{$x, $y, $z, $z\\}];",
   3014               []>;
   3015 def TEX_UNIFIED_3D_F32_F32
   3016   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3017                     Float32Regs:$b, Float32Regs:$a),
   3018               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3019                    Float32Regs:$z),
   3020               "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   3021               "[$t, \\{$x, $y, $z, $z\\}];",
   3022               []>;
   3023 def TEX_UNIFIED_3D_F32_F32_LEVEL
   3024   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3025                     Float32Regs:$b, Float32Regs:$a),
   3026               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3027                    Float32Regs:$z, Float32Regs:$lod),
   3028               "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   3029               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
   3030               []>;
   3031 def TEX_UNIFIED_3D_F32_F32_GRAD
   3032   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3033                     Float32Regs:$b, Float32Regs:$a),
   3034               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3035                    Float32Regs:$z,
   3036                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   3037                    Float32Regs:$gradx2, Float32Regs:$grady0,
   3038                    Float32Regs:$grady1, Float32Regs:$grady2),
   3039               "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   3040               "[$t, \\{$x, $y, $z, $z\\}], "
   3041               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
   3042               "\\{$grady0, $grady1, $grady2, $grady2\\};",
   3043               []>;
   3044 def TEX_UNIFIED_3D_S32_S32
   3045   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3046                     Int32Regs:$b, Int32Regs:$a),
   3047               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
   3048                    Int32Regs:$z),
   3049               "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
   3050               "[$t, \\{$x, $y, $z, $z\\}];",
   3051               []>;
   3052 def TEX_UNIFIED_3D_S32_F32
   3053   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3054                     Int32Regs:$b, Int32Regs:$a),
   3055               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3056                    Float32Regs:$z),
   3057               "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   3058               "[$t, \\{$x, $y, $z, $z\\}];",
   3059               []>;
   3060 def TEX_UNIFIED_3D_S32_F32_LEVEL
   3061   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3062                     Int32Regs:$b, Int32Regs:$a),
   3063               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3064                    Float32Regs:$z, Float32Regs:$lod),
   3065               "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   3066               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
   3067               []>;
   3068 def TEX_UNIFIED_3D_S32_F32_GRAD
   3069   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3070                     Int32Regs:$b, Int32Regs:$a),
   3071               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3072                    Float32Regs:$z,
   3073                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   3074                    Float32Regs:$gradx2, Float32Regs:$grady0,
   3075                    Float32Regs:$grady1, Float32Regs:$grady2),
   3076               "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   3077               "[$t, \\{$x, $y, $z, $z\\}], "
   3078               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
   3079               "\\{$grady0, $grady1, $grady2, $grady2\\};",
   3080               []>;
   3081 def TEX_UNIFIED_3D_U32_S32
   3082   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3083                     Int32Regs:$b, Int32Regs:$a),
   3084               (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
   3085                    Int32Regs:$z),
   3086               "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
   3087               "[$t, \\{$x, $y, $z, $z\\}];",
   3088               []>;
   3089 def TEX_UNIFIED_3D_U32_F32
   3090   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3091                     Int32Regs:$b, Int32Regs:$a),
   3092               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3093                    Float32Regs:$z),
   3094               "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   3095               "[$t, \\{$x, $y, $z, $z\\}];",
   3096               []>;
   3097 def TEX_UNIFIED_3D_U32_F32_LEVEL
   3098   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3099                     Int32Regs:$b, Int32Regs:$a),
   3100               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3101                    Float32Regs:$z, Float32Regs:$lod),
   3102               "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   3103               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
   3104               []>;
   3105 def TEX_UNIFIED_3D_U32_F32_GRAD
   3106   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3107                     Int32Regs:$b, Int32Regs:$a),
   3108               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
   3109                    Float32Regs:$z,
   3110                    Float32Regs:$gradx0, Float32Regs:$gradx1,
   3111                    Float32Regs:$gradx2, Float32Regs:$grady0,
   3112                    Float32Regs:$grady1, Float32Regs:$grady2),
   3113               "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   3114               "[$t, \\{$x, $y, $z, $z\\}], "
   3115               "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
   3116               "\\{$grady0, $grady1, $grady2, $grady2\\};",
   3117               []>;
   3118 
   3119 def TEX_UNIFIED_CUBE_F32_F32
   3120   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3121                     Float32Regs:$b, Float32Regs:$a),
   3122               (ins Int64Regs:$t,
   3123                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   3124               "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   3125               "[$t, \\{$x, $y, $z, $z\\}];",
   3126               []>;
   3127 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
   3128   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3129                     Float32Regs:$b, Float32Regs:$a),
   3130               (ins Int64Regs:$t,
   3131                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   3132                    Float32Regs:$lod),
   3133               "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   3134               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
   3135               []>;
   3136 def TEX_UNIFIED_CUBE_S32_F32
   3137   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3138                     Int32Regs:$b, Int32Regs:$a),
   3139               (ins Int64Regs:$t,
   3140                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   3141               "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   3142               "[$t, \\{$x, $y, $z, $z\\}];",
   3143               []>;
   3144 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
   3145   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3146                     Int32Regs:$b, Int32Regs:$a),
   3147               (ins Int64Regs:$t,
   3148                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   3149                    Float32Regs:$lod),
   3150               "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   3151               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
   3152               []>;
   3153 def TEX_UNIFIED_CUBE_U32_F32
   3154   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3155                     Int32Regs:$b, Int32Regs:$a),
   3156               (ins Int64Regs:$t,
   3157                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   3158               "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   3159               "[$t, \\{$x, $y, $z, $z\\}];",
   3160               []>;
   3161 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
   3162   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3163                     Int32Regs:$b, Int32Regs:$a),
   3164               (ins Int64Regs:$t,
   3165                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   3166                    Float32Regs:$lod),
   3167               "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   3168               "[$t, \\{$x, $y, $z, $z\\}], $lod;",
   3169               []>;
   3170 
   3171 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
   3172   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3173                     Float32Regs:$b, Float32Regs:$a),
   3174               (ins Int64Regs:$t, Int32Regs:$l,
   3175                Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   3176               "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   3177               "[$t, \\{$l, $x, $y, $z\\}];",
   3178               []>;
   3179 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
   3180   : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
   3181                     Float32Regs:$b, Float32Regs:$a),
   3182               (ins Int64Regs:$t, Int32Regs:$l,
   3183                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   3184                    Float32Regs:$lod),
   3185               "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
   3186               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
   3187               []>;
   3188 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
   3189   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3190                     Int32Regs:$b, Int32Regs:$a),
   3191               (ins Int64Regs:$t, Int32Regs:$l,
   3192                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   3193               "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   3194               "[$t, \\{$l, $x, $y, $z\\}];",
   3195               []>;
   3196 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
   3197   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3198                     Int32Regs:$b, Int32Regs:$a),
   3199               (ins Int64Regs:$t, Int32Regs:$l,
   3200                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   3201                    Float32Regs:$lod),
   3202               "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
   3203               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
   3204               []>;
   3205 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
   3206   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3207                     Int32Regs:$b, Int32Regs:$a),
   3208               (ins Int64Regs:$t, Int32Regs:$l,
   3209                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
   3210               "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   3211               "[$t, \\{$l, $x, $y, $z\\}];",
   3212               []>;
   3213 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
   3214   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
   3215                     Int32Regs:$b, Int32Regs:$a),
   3216               (ins Int64Regs:$t, Int32Regs:$l,
   3217                    Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
   3218                    Float32Regs:$lod),
   3219               "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
   3220               "[$t, \\{$l, $x, $y, $z\\}], $lod;",
   3221               []>;
   3222 
   3223 def TLD4_UNIFIED_R_2D_F32_F32
   3224   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   3225                     Float32Regs:$v2, Float32Regs:$v3),
   3226               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3227               "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   3228               "[$t, \\{$x, $y\\}];",
   3229               []>;
   3230 def TLD4_UNIFIED_G_2D_F32_F32
   3231   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   3232                     Float32Regs:$v2, Float32Regs:$v3),
   3233               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3234               "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   3235               "[$t, \\{$x, $y\\}];",
   3236               []>;
   3237 def TLD4_UNIFIED_B_2D_F32_F32
   3238   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   3239                     Float32Regs:$v2, Float32Regs:$v3),
   3240               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3241               "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   3242               "[$t, \\{$x, $y\\}];",
   3243               []>;
   3244 def TLD4_UNIFIED_A_2D_F32_F32
   3245   : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
   3246                     Float32Regs:$v2, Float32Regs:$v3),
   3247               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3248               "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   3249               "[$t, \\{$x, $y\\}];",
   3250               []>;
   3251 def TLD4_UNIFIED_R_2D_S32_F32
   3252   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3253                     Int32Regs:$v2, Int32Regs:$v3),
   3254               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3255               "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   3256               "[$t, \\{$x, $y\\}];",
   3257               []>;
   3258 def TLD4_UNIFIED_G_2D_S32_F32
   3259   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3260                     Int32Regs:$v2, Int32Regs:$v3),
   3261               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3262               "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   3263               "[$t, \\{$x, $y\\}];",
   3264               []>;
   3265 def TLD4_UNIFIED_B_2D_S32_F32
   3266   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3267                     Int32Regs:$v2, Int32Regs:$v3),
   3268               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3269               "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   3270               "[$t, \\{$x, $y\\}];",
   3271               []>;
   3272 def TLD4_UNIFIED_A_2D_S32_F32
   3273   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3274                     Int32Regs:$v2, Int32Regs:$v3),
   3275               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3276               "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   3277               "[$t, \\{$x, $y\\}];",
   3278               []>;
   3279 def TLD4_UNIFIED_R_2D_U32_F32
   3280   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3281                     Int32Regs:$v2, Int32Regs:$v3),
   3282               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3283               "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   3284               "[$t, \\{$x, $y\\}];",
   3285               []>;
   3286 def TLD4_UNIFIED_G_2D_U32_F32
   3287   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3288                     Int32Regs:$v2, Int32Regs:$v3),
   3289               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3290               "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   3291               "[$t, \\{$x, $y\\}];",
   3292               []>;
   3293 def TLD4_UNIFIED_B_2D_U32_F32
   3294   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3295                     Int32Regs:$v2, Int32Regs:$v3),
   3296               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3297               "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   3298               "[$t, \\{$x, $y\\}];",
   3299               []>;
   3300 def TLD4_UNIFIED_A_2D_U32_F32
   3301   : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
   3302                     Int32Regs:$v2, Int32Regs:$v3),
   3303               (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
   3304               "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
   3305               "[$t, \\{$x, $y\\}];",
   3306               []>;
   3307 }
   3308 
   3309 
   3310 
   3311 //=== Surface load instructions
   3312 // .clamp variant
   3313 let IsSuld = 1 in {
   3314 def SULD_1D_I8_CLAMP
   3315   : NVPTXInst<(outs Int16Regs:$r),
   3316               (ins Int64Regs:$s, Int32Regs:$x),
   3317               "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
   3318               []>;
   3319 def SULD_1D_I16_CLAMP
   3320   : NVPTXInst<(outs Int16Regs:$r),
   3321               (ins Int64Regs:$s, Int32Regs:$x),
   3322               "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
   3323               []>;
   3324 def SULD_1D_I32_CLAMP
   3325   : NVPTXInst<(outs Int32Regs:$r),
   3326               (ins Int64Regs:$s, Int32Regs:$x),
   3327               "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
   3328               []>;
   3329 def SULD_1D_I64_CLAMP
   3330   : NVPTXInst<(outs Int64Regs:$r),
   3331               (ins Int64Regs:$s, Int32Regs:$x),
   3332               "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
   3333               []>;
   3334 
   3335 def SULD_1D_ARRAY_I8_CLAMP
   3336   : NVPTXInst<(outs Int16Regs:$r),
   3337               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3338               "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
   3339               []>;
   3340 def SULD_1D_ARRAY_I16_CLAMP
   3341   : NVPTXInst<(outs Int16Regs:$r),
   3342               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3343               "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
   3344               []>;
   3345 def SULD_1D_ARRAY_I32_CLAMP
   3346   : NVPTXInst<(outs Int32Regs:$r),
   3347               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3348               "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
   3349               []>;
   3350 def SULD_1D_ARRAY_I64_CLAMP
   3351   : NVPTXInst<(outs Int64Regs:$r),
   3352               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3353               "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
   3354               []>;
   3355 
   3356 def SULD_2D_I8_CLAMP
   3357   : NVPTXInst<(outs Int16Regs:$r),
   3358               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3359               "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
   3360               []>;
   3361 def SULD_2D_I16_CLAMP
   3362   : NVPTXInst<(outs Int16Regs:$r),
   3363               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3364               "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
   3365               []>;
   3366 def SULD_2D_I32_CLAMP
   3367   : NVPTXInst<(outs Int32Regs:$r),
   3368               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3369               "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
   3370               []>;
   3371 def SULD_2D_I64_CLAMP
   3372   : NVPTXInst<(outs Int64Regs:$r),
   3373               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3374               "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
   3375               []>;
   3376 
   3377 def SULD_2D_ARRAY_I8_CLAMP
   3378   : NVPTXInst<(outs Int16Regs:$r),
   3379               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3380               "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   3381               []>;
   3382 def SULD_2D_ARRAY_I16_CLAMP
   3383   : NVPTXInst<(outs Int16Regs:$r),
   3384               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3385               "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   3386               []>;
   3387 def SULD_2D_ARRAY_I32_CLAMP
   3388   : NVPTXInst<(outs Int32Regs:$r),
   3389               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3390               "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   3391               []>;
   3392 def SULD_2D_ARRAY_I64_CLAMP
   3393   : NVPTXInst<(outs Int64Regs:$r),
   3394               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3395               "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   3396               []>;
   3397 
   3398 def SULD_3D_I8_CLAMP
   3399   : NVPTXInst<(outs Int16Regs:$r),
   3400               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3401               "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3402               []>;
   3403 def SULD_3D_I16_CLAMP
   3404   : NVPTXInst<(outs Int16Regs:$r),
   3405               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3406               "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3407               []>;
   3408 def SULD_3D_I32_CLAMP
   3409   : NVPTXInst<(outs Int32Regs:$r),
   3410               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3411               "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3412               []>;
   3413 def SULD_3D_I64_CLAMP
   3414   : NVPTXInst<(outs Int64Regs:$r),
   3415               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3416               "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3417               []>;
   3418 }
   3419 
   3420 let IsSuld = 2 in {
   3421 def SULD_1D_V2I8_CLAMP
   3422   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3423               (ins Int64Regs:$s, Int32Regs:$x),
   3424               "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
   3425               []>;
   3426 def SULD_1D_V2I16_CLAMP
   3427   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3428               (ins Int64Regs:$s, Int32Regs:$x),
   3429               "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
   3430               []>;
   3431 def SULD_1D_V2I32_CLAMP
   3432   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3433               (ins Int64Regs:$s, Int32Regs:$x),
   3434               "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
   3435               []>;
   3436 def SULD_1D_V2I64_CLAMP
   3437   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3438               (ins Int64Regs:$s, Int32Regs:$x),
   3439               "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
   3440               []>;
   3441 
   3442 def SULD_1D_ARRAY_V2I8_CLAMP
   3443   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3444               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3445               "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   3446               []>;
   3447 def SULD_1D_ARRAY_V2I16_CLAMP
   3448   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3449               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3450               "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   3451               []>;
   3452 def SULD_1D_ARRAY_V2I32_CLAMP
   3453   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3454               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3455               "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   3456               []>;
   3457 def SULD_1D_ARRAY_V2I64_CLAMP
   3458   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3459               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3460               "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   3461               []>;
   3462 
   3463 def SULD_2D_V2I8_CLAMP
   3464   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3465               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3466               "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   3467               []>;
   3468 def SULD_2D_V2I16_CLAMP
   3469   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3470               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3471               "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   3472               []>;
   3473 def SULD_2D_V2I32_CLAMP
   3474   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3475               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3476               "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   3477               []>;
   3478 def SULD_2D_V2I64_CLAMP
   3479   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3480               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3481               "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   3482               []>;
   3483 
   3484 def SULD_2D_ARRAY_V2I8_CLAMP
   3485   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3486               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3487               "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
   3488               "[$s, \\{$l, $x, $y, $y\\}];",
   3489               []>;
   3490 def SULD_2D_ARRAY_V2I16_CLAMP
   3491   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3492               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3493               "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
   3494               "[$s, \\{$l, $x, $y, $y\\}];",
   3495               []>;
   3496 def SULD_2D_ARRAY_V2I32_CLAMP
   3497   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3498               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3499               "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
   3500               "[$s, \\{$l, $x, $y, $y\\}];",
   3501               []>;
   3502 def SULD_2D_ARRAY_V2I64_CLAMP
   3503   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3504               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3505               "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
   3506               "[$s, \\{$l, $x, $y, $y\\}];",
   3507               []>;
   3508 
   3509 def SULD_3D_V2I8_CLAMP
   3510   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3511               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3512               "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3513               []>;
   3514 def SULD_3D_V2I16_CLAMP
   3515   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3516               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3517               "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3518               []>;
   3519 def SULD_3D_V2I32_CLAMP
   3520   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3521               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3522               "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3523               []>;
   3524 def SULD_3D_V2I64_CLAMP
   3525   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3526               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3527               "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3528               []>;
   3529 }
   3530 
   3531 let IsSuld = 3 in {
   3532 def SULD_1D_V4I8_CLAMP
   3533   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3534               (ins Int64Regs:$s, Int32Regs:$x),
   3535               "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   3536               []>;
   3537 def SULD_1D_V4I16_CLAMP
   3538   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3539               (ins Int64Regs:$s, Int32Regs:$x),
   3540               "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   3541               []>;
   3542 def SULD_1D_V4I32_CLAMP
   3543   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3544               (ins Int64Regs:$s, Int32Regs:$x),
   3545               "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   3546               []>;
   3547 
   3548 def SULD_1D_ARRAY_V4I8_CLAMP
   3549   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3550               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3551               "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
   3552               "[$s, \\{$l, $x\\}];",
   3553               []>;
   3554 def SULD_1D_ARRAY_V4I16_CLAMP
   3555   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3556               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3557               "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
   3558               "[$s, \\{$l, $x\\}];",
   3559               []>;
   3560 def SULD_1D_ARRAY_V4I32_CLAMP
   3561   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3562               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3563               "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
   3564               "[$s, \\{$l, $x\\}];",
   3565               []>;
   3566 
   3567 def SULD_2D_V4I8_CLAMP
   3568   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3569               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3570               "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   3571               []>;
   3572 def SULD_2D_V4I16_CLAMP
   3573   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3574               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3575               "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   3576               []>;
   3577 def SULD_2D_V4I32_CLAMP
   3578   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3579               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3580               "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   3581               []>;
   3582 
   3583 def SULD_2D_ARRAY_V4I8_CLAMP
   3584   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3585               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3586               "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
   3587               "[$s, \\{$l, $x, $y, $y\\}];",
   3588               []>;
   3589 def SULD_2D_ARRAY_V4I16_CLAMP
   3590   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3591               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3592               "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
   3593               "[$s, \\{$l, $x, $y, $y\\}];",
   3594               []>;
   3595 def SULD_2D_ARRAY_V4I32_CLAMP
   3596   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3597               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3598               "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
   3599               "[$s, \\{$l, $x, $y, $y\\}];",
   3600               []>;
   3601 
   3602 
   3603 def SULD_3D_V4I8_CLAMP
   3604   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3605               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3606               "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
   3607               "[$s, \\{$x, $y, $z, $z\\}];",
   3608               []>;
   3609 def SULD_3D_V4I16_CLAMP
   3610   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3611               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3612               "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
   3613               "[$s, \\{$x, $y, $z, $z\\}];",
   3614               []>;
   3615 def SULD_3D_V4I32_CLAMP
   3616   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3617               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3618               "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
   3619               "[$s, \\{$x, $y, $z, $z\\}];",
   3620               []>;
   3621 }
   3622 
   3623 
   3624 // .trap variant
   3625 let IsSuld = 1 in {
   3626 def SULD_1D_I8_TRAP
   3627   : NVPTXInst<(outs Int16Regs:$r),
   3628               (ins Int64Regs:$s, Int32Regs:$x),
   3629               "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
   3630               []>;
   3631 def SULD_1D_I16_TRAP
   3632   : NVPTXInst<(outs Int16Regs:$r),
   3633               (ins Int64Regs:$s, Int32Regs:$x),
   3634               "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
   3635               []>;
   3636 def SULD_1D_I32_TRAP
   3637   : NVPTXInst<(outs Int32Regs:$r),
   3638               (ins Int64Regs:$s, Int32Regs:$x),
   3639               "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
   3640               []>;
   3641 def SULD_1D_I64_TRAP
   3642   : NVPTXInst<(outs Int64Regs:$r),
   3643               (ins Int64Regs:$s, Int32Regs:$x),
   3644               "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
   3645               []>;
   3646 
   3647 def SULD_1D_ARRAY_I8_TRAP
   3648   : NVPTXInst<(outs Int16Regs:$r),
   3649               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3650               "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
   3651               []>;
   3652 def SULD_1D_ARRAY_I16_TRAP
   3653   : NVPTXInst<(outs Int16Regs:$r),
   3654               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3655               "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
   3656               []>;
   3657 def SULD_1D_ARRAY_I32_TRAP
   3658   : NVPTXInst<(outs Int32Regs:$r),
   3659               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3660               "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
   3661               []>;
   3662 def SULD_1D_ARRAY_I64_TRAP
   3663   : NVPTXInst<(outs Int64Regs:$r),
   3664               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3665               "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
   3666               []>;
   3667 
   3668 def SULD_2D_I8_TRAP
   3669   : NVPTXInst<(outs Int16Regs:$r),
   3670               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3671               "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
   3672               []>;
   3673 def SULD_2D_I16_TRAP
   3674   : NVPTXInst<(outs Int16Regs:$r),
   3675               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3676               "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
   3677               []>;
   3678 def SULD_2D_I32_TRAP
   3679   : NVPTXInst<(outs Int32Regs:$r),
   3680               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3681               "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
   3682               []>;
   3683 def SULD_2D_I64_TRAP
   3684   : NVPTXInst<(outs Int64Regs:$r),
   3685               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3686               "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
   3687               []>;
   3688 
   3689 def SULD_2D_ARRAY_I8_TRAP
   3690   : NVPTXInst<(outs Int16Regs:$r),
   3691               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3692               "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   3693               []>;
   3694 def SULD_2D_ARRAY_I16_TRAP
   3695   : NVPTXInst<(outs Int16Regs:$r),
   3696               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3697               "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   3698               []>;
   3699 def SULD_2D_ARRAY_I32_TRAP
   3700   : NVPTXInst<(outs Int32Regs:$r),
   3701               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3702               "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   3703               []>;
   3704 def SULD_2D_ARRAY_I64_TRAP
   3705   : NVPTXInst<(outs Int64Regs:$r),
   3706               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3707               "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   3708               []>;
   3709 
   3710 def SULD_3D_I8_TRAP
   3711   : NVPTXInst<(outs Int16Regs:$r),
   3712               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3713               "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3714               []>;
   3715 def SULD_3D_I16_TRAP
   3716   : NVPTXInst<(outs Int16Regs:$r),
   3717               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3718               "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3719               []>;
   3720 def SULD_3D_I32_TRAP
   3721   : NVPTXInst<(outs Int32Regs:$r),
   3722               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3723               "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3724               []>;
   3725 def SULD_3D_I64_TRAP
   3726   : NVPTXInst<(outs Int64Regs:$r),
   3727               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3728               "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3729               []>;
   3730 }
   3731 
   3732 let IsSuld = 2 in {
   3733 def SULD_1D_V2I8_TRAP
   3734   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3735               (ins Int64Regs:$s, Int32Regs:$x),
   3736               "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
   3737               []>;
   3738 def SULD_1D_V2I16_TRAP
   3739   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3740               (ins Int64Regs:$s, Int32Regs:$x),
   3741               "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
   3742               []>;
   3743 def SULD_1D_V2I32_TRAP
   3744   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3745               (ins Int64Regs:$s, Int32Regs:$x),
   3746               "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
   3747               []>;
   3748 def SULD_1D_V2I64_TRAP
   3749   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3750               (ins Int64Regs:$s, Int32Regs:$x),
   3751               "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
   3752               []>;
   3753 
   3754 def SULD_1D_ARRAY_V2I8_TRAP
   3755   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3756               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3757               "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   3758               []>;
   3759 def SULD_1D_ARRAY_V2I16_TRAP
   3760   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3761               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3762               "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   3763               []>;
   3764 def SULD_1D_ARRAY_V2I32_TRAP
   3765   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3766               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3767               "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   3768               []>;
   3769 def SULD_1D_ARRAY_V2I64_TRAP
   3770   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3771               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3772               "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   3773               []>;
   3774 
   3775 def SULD_2D_V2I8_TRAP
   3776   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3777               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3778               "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   3779               []>;
   3780 def SULD_2D_V2I16_TRAP
   3781   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3782               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3783               "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   3784               []>;
   3785 def SULD_2D_V2I32_TRAP
   3786   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3787               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3788               "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   3789               []>;
   3790 def SULD_2D_V2I64_TRAP
   3791   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3792               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3793               "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   3794               []>;
   3795 
   3796 def SULD_2D_ARRAY_V2I8_TRAP
   3797   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3798               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3799               "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
   3800               "[$s, \\{$l, $x, $y, $y\\}];",
   3801               []>;
   3802 def SULD_2D_ARRAY_V2I16_TRAP
   3803   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3804               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3805               "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
   3806               "[$s, \\{$l, $x, $y, $y\\}];",
   3807               []>;
   3808 def SULD_2D_ARRAY_V2I32_TRAP
   3809   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3810               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3811               "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
   3812               "[$s, \\{$l, $x, $y, $y\\}];",
   3813               []>;
   3814 def SULD_2D_ARRAY_V2I64_TRAP
   3815   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3816               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3817               "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
   3818               "[$s, \\{$l, $x, $y, $y\\}];",
   3819               []>;
   3820 
   3821 def SULD_3D_V2I8_TRAP
   3822   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3823               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3824               "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3825               []>;
   3826 def SULD_3D_V2I16_TRAP
   3827   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   3828               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3829               "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3830               []>;
   3831 def SULD_3D_V2I32_TRAP
   3832   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   3833               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3834               "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3835               []>;
   3836 def SULD_3D_V2I64_TRAP
   3837   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   3838               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3839               "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   3840               []>;
   3841 }
   3842 
   3843 let IsSuld = 3 in {
   3844 def SULD_1D_V4I8_TRAP
   3845   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3846               (ins Int64Regs:$s, Int32Regs:$x),
   3847               "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   3848               []>;
   3849 def SULD_1D_V4I16_TRAP
   3850   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3851               (ins Int64Regs:$s, Int32Regs:$x),
   3852               "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   3853               []>;
   3854 def SULD_1D_V4I32_TRAP
   3855   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3856               (ins Int64Regs:$s, Int32Regs:$x),
   3857               "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   3858               []>;
   3859 
   3860 def SULD_1D_ARRAY_V4I8_TRAP
   3861   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3862               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3863               "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
   3864               "[$s, \\{$l, $x\\}];",
   3865               []>;
   3866 def SULD_1D_ARRAY_V4I16_TRAP
   3867   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3868               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3869               "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
   3870               "[$s, \\{$l, $x\\}];",
   3871               []>;
   3872 def SULD_1D_ARRAY_V4I32_TRAP
   3873   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3874               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3875               "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
   3876               "[$s, \\{$l, $x\\}];",
   3877               []>;
   3878 
   3879 def SULD_2D_V4I8_TRAP
   3880   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3881               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3882               "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   3883               []>;
   3884 def SULD_2D_V4I16_TRAP
   3885   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3886               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3887               "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   3888               []>;
   3889 def SULD_2D_V4I32_TRAP
   3890   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3891               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3892               "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   3893               []>;
   3894 
   3895 def SULD_2D_ARRAY_V4I8_TRAP
   3896   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3897               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3898               "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
   3899               "[$s, \\{$l, $x, $y, $y\\}];",
   3900               []>;
   3901 def SULD_2D_ARRAY_V4I16_TRAP
   3902   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3903               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3904               "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
   3905               "[$s, \\{$l, $x, $y, $y\\}];",
   3906               []>;
   3907 def SULD_2D_ARRAY_V4I32_TRAP
   3908   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3909               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   3910               "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
   3911               "[$s, \\{$l, $x, $y, $y\\}];",
   3912               []>;
   3913 
   3914 
   3915 def SULD_3D_V4I8_TRAP
   3916   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3917               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3918               "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
   3919               "[$s, \\{$x, $y, $z, $z\\}];",
   3920               []>;
   3921 def SULD_3D_V4I16_TRAP
   3922   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   3923               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3924               "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
   3925               "[$s, \\{$x, $y, $z, $z\\}];",
   3926               []>;
   3927 def SULD_3D_V4I32_TRAP
   3928   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   3929               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   3930               "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
   3931               "[$s, \\{$x, $y, $z, $z\\}];",
   3932               []>;
   3933 }
   3934 
   3935 // .zero variant
   3936 let IsSuld = 1 in {
   3937 def SULD_1D_I8_ZERO
   3938   : NVPTXInst<(outs Int16Regs:$r),
   3939               (ins Int64Regs:$s, Int32Regs:$x),
   3940               "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
   3941               []>;
   3942 def SULD_1D_I16_ZERO
   3943   : NVPTXInst<(outs Int16Regs:$r),
   3944               (ins Int64Regs:$s, Int32Regs:$x),
   3945               "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
   3946               []>;
   3947 def SULD_1D_I32_ZERO
   3948   : NVPTXInst<(outs Int32Regs:$r),
   3949               (ins Int64Regs:$s, Int32Regs:$x),
   3950               "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
   3951               []>;
   3952 def SULD_1D_I64_ZERO
   3953   : NVPTXInst<(outs Int64Regs:$r),
   3954               (ins Int64Regs:$s, Int32Regs:$x),
   3955               "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
   3956               []>;
   3957 
   3958 def SULD_1D_ARRAY_I8_ZERO
   3959   : NVPTXInst<(outs Int16Regs:$r),
   3960               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3961               "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
   3962               []>;
   3963 def SULD_1D_ARRAY_I16_ZERO
   3964   : NVPTXInst<(outs Int16Regs:$r),
   3965               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3966               "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
   3967               []>;
   3968 def SULD_1D_ARRAY_I32_ZERO
   3969   : NVPTXInst<(outs Int32Regs:$r),
   3970               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3971               "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
   3972               []>;
   3973 def SULD_1D_ARRAY_I64_ZERO
   3974   : NVPTXInst<(outs Int64Regs:$r),
   3975               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   3976               "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
   3977               []>;
   3978 
   3979 def SULD_2D_I8_ZERO
   3980   : NVPTXInst<(outs Int16Regs:$r),
   3981               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3982               "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
   3983               []>;
   3984 def SULD_2D_I16_ZERO
   3985   : NVPTXInst<(outs Int16Regs:$r),
   3986               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3987               "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
   3988               []>;
   3989 def SULD_2D_I32_ZERO
   3990   : NVPTXInst<(outs Int32Regs:$r),
   3991               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3992               "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
   3993               []>;
   3994 def SULD_2D_I64_ZERO
   3995   : NVPTXInst<(outs Int64Regs:$r),
   3996               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   3997               "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
   3998               []>;
   3999 
   4000 def SULD_2D_ARRAY_I8_ZERO
   4001   : NVPTXInst<(outs Int16Regs:$r),
   4002               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4003               "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   4004               []>;
   4005 def SULD_2D_ARRAY_I16_ZERO
   4006   : NVPTXInst<(outs Int16Regs:$r),
   4007               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4008               "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   4009               []>;
   4010 def SULD_2D_ARRAY_I32_ZERO
   4011   : NVPTXInst<(outs Int32Regs:$r),
   4012               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4013               "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   4014               []>;
   4015 def SULD_2D_ARRAY_I64_ZERO
   4016   : NVPTXInst<(outs Int64Regs:$r),
   4017               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4018               "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
   4019               []>;
   4020 
   4021 def SULD_3D_I8_ZERO
   4022   : NVPTXInst<(outs Int16Regs:$r),
   4023               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4024               "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4025               []>;
   4026 def SULD_3D_I16_ZERO
   4027   : NVPTXInst<(outs Int16Regs:$r),
   4028               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4029               "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4030               []>;
   4031 def SULD_3D_I32_ZERO
   4032   : NVPTXInst<(outs Int32Regs:$r),
   4033               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4034               "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4035               []>;
   4036 def SULD_3D_I64_ZERO
   4037   : NVPTXInst<(outs Int64Regs:$r),
   4038               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4039               "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4040               []>;
   4041 }
   4042 
   4043 let IsSuld = 2 in {
   4044 def SULD_1D_V2I8_ZERO
   4045   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4046               (ins Int64Regs:$s, Int32Regs:$x),
   4047               "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
   4048               []>;
   4049 def SULD_1D_V2I16_ZERO
   4050   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4051               (ins Int64Regs:$s, Int32Regs:$x),
   4052               "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
   4053               []>;
   4054 def SULD_1D_V2I32_ZERO
   4055   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4056               (ins Int64Regs:$s, Int32Regs:$x),
   4057               "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
   4058               []>;
   4059 def SULD_1D_V2I64_ZERO
   4060   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4061               (ins Int64Regs:$s, Int32Regs:$x),
   4062               "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
   4063               []>;
   4064 
   4065 def SULD_1D_ARRAY_V2I8_ZERO
   4066   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4067               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4068               "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   4069               []>;
   4070 def SULD_1D_ARRAY_V2I16_ZERO
   4071   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4072               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4073               "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   4074               []>;
   4075 def SULD_1D_ARRAY_V2I32_ZERO
   4076   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4077               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4078               "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   4079               []>;
   4080 def SULD_1D_ARRAY_V2I64_ZERO
   4081   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4082               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4083               "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
   4084               []>;
   4085 
   4086 def SULD_2D_V2I8_ZERO
   4087   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4088               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4089               "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   4090               []>;
   4091 def SULD_2D_V2I16_ZERO
   4092   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4093               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4094               "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   4095               []>;
   4096 def SULD_2D_V2I32_ZERO
   4097   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4098               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4099               "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   4100               []>;
   4101 def SULD_2D_V2I64_ZERO
   4102   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4103               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4104               "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
   4105               []>;
   4106 
   4107 def SULD_2D_ARRAY_V2I8_ZERO
   4108   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4109               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4110               "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
   4111               "[$s, \\{$l, $x, $y, $y\\}];",
   4112               []>;
   4113 def SULD_2D_ARRAY_V2I16_ZERO
   4114   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4115               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4116               "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
   4117               "[$s, \\{$l, $x, $y, $y\\}];",
   4118               []>;
   4119 def SULD_2D_ARRAY_V2I32_ZERO
   4120   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4121               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4122               "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
   4123               "[$s, \\{$l, $x, $y, $y\\}];",
   4124               []>;
   4125 def SULD_2D_ARRAY_V2I64_ZERO
   4126   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4127               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4128               "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
   4129               "[$s, \\{$l, $x, $y, $y\\}];",
   4130               []>;
   4131 
   4132 def SULD_3D_V2I8_ZERO
   4133   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4134               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4135               "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4136               []>;
   4137 def SULD_3D_V2I16_ZERO
   4138   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
   4139               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4140               "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4141               []>;
   4142 def SULD_3D_V2I32_ZERO
   4143   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
   4144               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4145               "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4146               []>;
   4147 def SULD_3D_V2I64_ZERO
   4148   : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
   4149               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4150               "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
   4151               []>;
   4152 }
   4153 
   4154 let IsSuld = 3 in {
   4155 def SULD_1D_V4I8_ZERO
   4156   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4157               (ins Int64Regs:$s, Int32Regs:$x),
   4158               "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   4159               []>;
   4160 def SULD_1D_V4I16_ZERO
   4161   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4162               (ins Int64Regs:$s, Int32Regs:$x),
   4163               "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   4164               []>;
   4165 def SULD_1D_V4I32_ZERO
   4166   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4167               (ins Int64Regs:$s, Int32Regs:$x),
   4168               "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
   4169               []>;
   4170 
   4171 def SULD_1D_ARRAY_V4I8_ZERO
   4172   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4173               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4174               "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
   4175               "[$s, \\{$l, $x\\}];",
   4176               []>;
   4177 def SULD_1D_ARRAY_V4I16_ZERO
   4178   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4179               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4180               "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
   4181               "[$s, \\{$l, $x\\}];",
   4182               []>;
   4183 def SULD_1D_ARRAY_V4I32_ZERO
   4184   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4185               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
   4186               "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
   4187               "[$s, \\{$l, $x\\}];",
   4188               []>;
   4189 
   4190 def SULD_2D_V4I8_ZERO
   4191   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4192               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4193               "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   4194               []>;
   4195 def SULD_2D_V4I16_ZERO
   4196   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4197               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4198               "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   4199               []>;
   4200 def SULD_2D_V4I32_ZERO
   4201   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4202               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
   4203               "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
   4204               []>;
   4205 
   4206 def SULD_2D_ARRAY_V4I8_ZERO
   4207   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4208               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4209               "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
   4210               "[$s, \\{$l, $x, $y, $y\\}];",
   4211               []>;
   4212 def SULD_2D_ARRAY_V4I16_ZERO
   4213   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4214               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4215               "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
   4216               "[$s, \\{$l, $x, $y, $y\\}];",
   4217               []>;
   4218 def SULD_2D_ARRAY_V4I32_ZERO
   4219   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4220               (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
   4221               "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
   4222               "[$s, \\{$l, $x, $y, $y\\}];",
   4223               []>;
   4224 
   4225 
   4226 def SULD_3D_V4I8_ZERO
   4227   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4228               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4229               "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
   4230               "[$s, \\{$x, $y, $z, $z\\}];",
   4231               []>;
   4232 def SULD_3D_V4I16_ZERO
   4233   : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4234               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4235               "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
   4236               "[$s, \\{$x, $y, $z, $z\\}];",
   4237               []>;
   4238 def SULD_3D_V4I32_ZERO
   4239   : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4240               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
   4241               "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
   4242               "[$s, \\{$x, $y, $z, $z\\}];",
   4243               []>;
   4244 }
   4245 
   4246 //-----------------------------------
   4247 // Texture Query Intrinsics
   4248 //-----------------------------------
   4249 
   4250 let IsSurfTexQuery = 1 in {
   4251 def TXQ_CHANNEL_ORDER
   4252   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4253               "txq.channel_order.b32 \t$d, [$a];",
   4254               []>;
   4255 def TXQ_CHANNEL_DATA_TYPE
   4256   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4257               "txq.channel_data_type.b32 \t$d, [$a];",
   4258               []>;
   4259 def TXQ_WIDTH
   4260   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4261               "txq.width.b32 \t$d, [$a];",
   4262               []>;
   4263 def TXQ_HEIGHT
   4264   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4265               "txq.height.b32 \t$d, [$a];",
   4266               []>;
   4267 def TXQ_DEPTH
   4268   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4269               "txq.depth.b32 \t$d, [$a];",
   4270               []>;
   4271 def TXQ_ARRAY_SIZE
   4272   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4273               "txq.array_size.b32 \t$d, [$a];",
   4274               []>;
   4275 def TXQ_NUM_SAMPLES
   4276   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4277               "txq.num_samples.b32 \t$d, [$a];",
   4278               []>;
   4279 def TXQ_NUM_MIPMAP_LEVELS
   4280   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4281               "txq.num_mipmap_levels.b32 \t$d, [$a];",
   4282               []>;
   4283 }
   4284 
   4285 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
   4286           (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
   4287 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
   4288           (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
   4289 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
   4290           (TXQ_WIDTH Int64Regs:$a)>;
   4291 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
   4292           (TXQ_HEIGHT Int64Regs:$a)>;
   4293 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
   4294           (TXQ_DEPTH Int64Regs:$a)>;
   4295 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
   4296           (TXQ_ARRAY_SIZE Int64Regs:$a)>;
   4297 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
   4298           (TXQ_NUM_SAMPLES Int64Regs:$a)>;
   4299 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
   4300           (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
   4301 
   4302 
   4303 //-----------------------------------
   4304 // Surface Query Intrinsics
   4305 //-----------------------------------
   4306 
   4307 let IsSurfTexQuery = 1 in {
   4308 def SUQ_CHANNEL_ORDER
   4309   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4310               "suq.channel_order.b32 \t$d, [$a];",
   4311               []>;
   4312 def SUQ_CHANNEL_DATA_TYPE
   4313   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4314               "suq.channel_data_type.b32 \t$d, [$a];",
   4315               []>;
   4316 def SUQ_WIDTH
   4317   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4318               "suq.width.b32 \t$d, [$a];",
   4319               []>;
   4320 def SUQ_HEIGHT
   4321   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4322               "suq.height.b32 \t$d, [$a];",
   4323               []>;
   4324 def SUQ_DEPTH
   4325   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4326               "suq.depth.b32 \t$d, [$a];",
   4327               []>;
   4328 def SUQ_ARRAY_SIZE
   4329   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
   4330               "suq.array_size.b32 \t$d, [$a];",
   4331               []>;
   4332 }
   4333 
   4334 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
   4335           (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
   4336 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
   4337           (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
   4338 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
   4339           (SUQ_WIDTH Int64Regs:$a)>;
   4340 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
   4341           (SUQ_HEIGHT Int64Regs:$a)>;
   4342 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
   4343           (SUQ_DEPTH Int64Regs:$a)>;
   4344 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
   4345           (SUQ_ARRAY_SIZE Int64Regs:$a)>;
   4346 
   4347 
   4348 //===- Handle Query -------------------------------------------------------===//
   4349 
   4350 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
   4351 def ISTYPEP_SAMPLER
   4352   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   4353               "istypep.samplerref \t$d, $a;",
   4354               [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
   4355 def ISTYPEP_SURFACE
   4356   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   4357               "istypep.surfref \t$d, $a;",
   4358               [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
   4359 def ISTYPEP_TEXTURE
   4360   : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
   4361               "istypep.texref \t$d, $a;",
   4362               [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
   4363 
   4364 //===- Surface Stores -----------------------------------------------------===//
   4365 
   4366 let IsSust = 1 in {
   4367 // Unformatted
   4368 // .clamp variant
   4369 def SUST_B_1D_B8_CLAMP
   4370   : NVPTXInst<(outs),
   4371               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   4372               "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
   4373               []>;
   4374 def SUST_B_1D_B16_CLAMP
   4375   : NVPTXInst<(outs),
   4376               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   4377               "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
   4378               []>;
   4379 def SUST_B_1D_B32_CLAMP
   4380   : NVPTXInst<(outs),
   4381               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   4382               "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
   4383               []>;
   4384 def SUST_B_1D_B64_CLAMP
   4385   : NVPTXInst<(outs),
   4386               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
   4387               "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
   4388               []>;
   4389 def SUST_B_1D_V2B8_CLAMP
   4390   : NVPTXInst<(outs),
   4391               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   4392               "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
   4393               []>;
   4394 def SUST_B_1D_V2B16_CLAMP
   4395   : NVPTXInst<(outs),
   4396               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   4397               "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
   4398               []>;
   4399 def SUST_B_1D_V2B32_CLAMP
   4400   : NVPTXInst<(outs),
   4401               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   4402               "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
   4403               []>;
   4404 def SUST_B_1D_V2B64_CLAMP
   4405   : NVPTXInst<(outs),
   4406               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   4407               "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
   4408               []>;
   4409 def SUST_B_1D_V4B8_CLAMP
   4410   : NVPTXInst<(outs),
   4411               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   4412                    Int16Regs:$b, Int16Regs:$a),
   4413               "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   4414               []>;
   4415 def SUST_B_1D_V4B16_CLAMP
   4416   : NVPTXInst<(outs),
   4417               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   4418                    Int16Regs:$b, Int16Regs:$a),
   4419               "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   4420               []>;
   4421 def SUST_B_1D_V4B32_CLAMP
   4422   : NVPTXInst<(outs),
   4423               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
   4424                    Int32Regs:$b, Int32Regs:$a),
   4425               "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   4426               []>;
   4427 
   4428 
   4429 def SUST_B_1D_ARRAY_B8_CLAMP
   4430   : NVPTXInst<(outs),
   4431               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   4432               "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   4433               []>;
   4434 def SUST_B_1D_ARRAY_B16_CLAMP
   4435   : NVPTXInst<(outs),
   4436               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   4437               "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   4438               []>;
   4439 def SUST_B_1D_ARRAY_B32_CLAMP
   4440   : NVPTXInst<(outs),
   4441               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
   4442               "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   4443               []>;
   4444 def SUST_B_1D_ARRAY_B64_CLAMP
   4445   : NVPTXInst<(outs),
   4446               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
   4447               "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   4448               []>;
   4449 def SUST_B_1D_ARRAY_V2B8_CLAMP
   4450   : NVPTXInst<(outs),
   4451               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   4452                    Int16Regs:$g),
   4453               "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   4454               []>;
   4455 def SUST_B_1D_ARRAY_V2B16_CLAMP
   4456   : NVPTXInst<(outs),
   4457               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   4458                    Int16Regs:$g),
   4459               "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   4460               []>;
   4461 def SUST_B_1D_ARRAY_V2B32_CLAMP
   4462   : NVPTXInst<(outs),
   4463               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   4464                    Int32Regs:$g),
   4465               "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   4466               []>;
   4467 def SUST_B_1D_ARRAY_V2B64_CLAMP
   4468   : NVPTXInst<(outs),
   4469               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
   4470                    Int64Regs:$g),
   4471               "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   4472               []>;
   4473 def SUST_B_1D_ARRAY_V4B8_CLAMP
   4474   : NVPTXInst<(outs),
   4475               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   4476                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4477               "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
   4478               "\\{$r, $g, $b, $a\\};",
   4479               []>;
   4480 def SUST_B_1D_ARRAY_V4B16_CLAMP
   4481   : NVPTXInst<(outs),
   4482               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   4483                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4484              "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
   4485              "\\{$r, $g, $b, $a\\};",
   4486               []>;
   4487 def SUST_B_1D_ARRAY_V4B32_CLAMP
   4488   : NVPTXInst<(outs),
   4489               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   4490                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4491              "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
   4492              "\\{$r, $g, $b, $a\\};",
   4493               []>;
   4494 
   4495 
   4496 def SUST_B_2D_B8_CLAMP
   4497   : NVPTXInst<(outs),
   4498               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   4499               "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
   4500               []>;
   4501 def SUST_B_2D_B16_CLAMP
   4502   : NVPTXInst<(outs),
   4503               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   4504               "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
   4505               []>;
   4506 def SUST_B_2D_B32_CLAMP
   4507   : NVPTXInst<(outs),
   4508               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   4509               "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
   4510               []>;
   4511 def SUST_B_2D_B64_CLAMP
   4512   : NVPTXInst<(outs),
   4513               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   4514               "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
   4515               []>;
   4516 def SUST_B_2D_V2B8_CLAMP
   4517   : NVPTXInst<(outs),
   4518               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   4519                    Int16Regs:$g),
   4520               "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   4521               []>;
   4522 def SUST_B_2D_V2B16_CLAMP
   4523   : NVPTXInst<(outs),
   4524               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   4525                    Int16Regs:$g),
   4526               "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   4527               []>;
   4528 def SUST_B_2D_V2B32_CLAMP
   4529   : NVPTXInst<(outs),
   4530               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   4531                    Int32Regs:$g),
   4532               "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   4533               []>;
   4534 def SUST_B_2D_V2B64_CLAMP
   4535   : NVPTXInst<(outs),
   4536               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
   4537                    Int64Regs:$g),
   4538               "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   4539               []>;
   4540 def SUST_B_2D_V4B8_CLAMP
   4541   : NVPTXInst<(outs),
   4542               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   4543                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4544               "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
   4545               "\\{$r, $g, $b, $a\\};",
   4546               []>;
   4547 def SUST_B_2D_V4B16_CLAMP
   4548   : NVPTXInst<(outs),
   4549               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   4550                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4551              "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
   4552              "\\{$r, $g, $b, $a\\};",
   4553               []>;
   4554 def SUST_B_2D_V4B32_CLAMP
   4555   : NVPTXInst<(outs),
   4556               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   4557                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4558              "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
   4559              "\\{$r, $g, $b, $a\\};",
   4560               []>;
   4561 
   4562 
   4563 def SUST_B_2D_ARRAY_B8_CLAMP
   4564   : NVPTXInst<(outs),
   4565               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4566                    Int16Regs:$r),
   4567               "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   4568               []>;
   4569 def SUST_B_2D_ARRAY_B16_CLAMP
   4570   : NVPTXInst<(outs),
   4571               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4572                    Int16Regs:$r),
   4573               "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   4574               []>;
   4575 def SUST_B_2D_ARRAY_B32_CLAMP
   4576   : NVPTXInst<(outs),
   4577               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4578                    Int32Regs:$r),
   4579               "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   4580               []>;
   4581 def SUST_B_2D_ARRAY_B64_CLAMP
   4582   : NVPTXInst<(outs),
   4583               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4584                    Int64Regs:$r),
   4585               "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   4586               []>;
   4587 def SUST_B_2D_ARRAY_V2B8_CLAMP
   4588   : NVPTXInst<(outs),
   4589               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4590                    Int16Regs:$r, Int16Regs:$g),
   4591               "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4592               "\\{$r, $g\\};",
   4593               []>;
   4594 def SUST_B_2D_ARRAY_V2B16_CLAMP
   4595   : NVPTXInst<(outs),
   4596               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4597                    Int16Regs:$r, Int16Regs:$g),
   4598              "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4599              "\\{$r, $g\\};",
   4600               []>;
   4601 def SUST_B_2D_ARRAY_V2B32_CLAMP
   4602   : NVPTXInst<(outs),
   4603               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4604                    Int32Regs:$r, Int32Regs:$g),
   4605              "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4606              "\\{$r, $g\\};",
   4607               []>;
   4608 def SUST_B_2D_ARRAY_V2B64_CLAMP
   4609   : NVPTXInst<(outs),
   4610               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4611                    Int64Regs:$r, Int64Regs:$g),
   4612              "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4613              "\\{$r, $g\\};",
   4614               []>;
   4615 def SUST_B_2D_ARRAY_V4B8_CLAMP
   4616   : NVPTXInst<(outs),
   4617               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4618                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4619       "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4620       "\\{$r, $g, $b, $a\\};",
   4621               []>;
   4622 def SUST_B_2D_ARRAY_V4B16_CLAMP
   4623   : NVPTXInst<(outs),
   4624               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4625                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4626      "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4627      "\\{$r, $g, $b, $a\\};",
   4628               []>;
   4629 def SUST_B_2D_ARRAY_V4B32_CLAMP
   4630   : NVPTXInst<(outs),
   4631               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4632                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4633      "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
   4634      "\\{$r, $g, $b, $a\\};",
   4635               []>;
   4636 
   4637 
   4638 def SUST_B_3D_B8_CLAMP
   4639   : NVPTXInst<(outs),
   4640               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4641                    Int16Regs:$r),
   4642               "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   4643               []>;
   4644 def SUST_B_3D_B16_CLAMP
   4645   : NVPTXInst<(outs),
   4646               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4647                    Int16Regs:$r),
   4648               "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   4649               []>;
   4650 def SUST_B_3D_B32_CLAMP
   4651   : NVPTXInst<(outs),
   4652               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4653                    Int32Regs:$r),
   4654               "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   4655               []>;
   4656 def SUST_B_3D_B64_CLAMP
   4657   : NVPTXInst<(outs),
   4658               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4659                    Int64Regs:$r),
   4660               "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   4661               []>;
   4662 def SUST_B_3D_V2B8_CLAMP
   4663   : NVPTXInst<(outs),
   4664               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4665                    Int16Regs:$r, Int16Regs:$g),
   4666               "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   4667               "\\{$r, $g\\};",
   4668               []>;
   4669 def SUST_B_3D_V2B16_CLAMP
   4670   : NVPTXInst<(outs),
   4671               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4672                    Int16Regs:$r, Int16Regs:$g),
   4673               "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   4674               "\\{$r, $g\\};",
   4675               []>;
   4676 def SUST_B_3D_V2B32_CLAMP
   4677   : NVPTXInst<(outs),
   4678               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4679                    Int32Regs:$r, Int32Regs:$g),
   4680               "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   4681               "\\{$r, $g\\};",
   4682               []>;
   4683 def SUST_B_3D_V2B64_CLAMP
   4684   : NVPTXInst<(outs),
   4685               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4686                    Int64Regs:$r, Int64Regs:$g),
   4687               "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   4688               "\\{$r, $g\\};",
   4689               []>;
   4690 def SUST_B_3D_V4B8_CLAMP
   4691   : NVPTXInst<(outs),
   4692               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4693                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4694          "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   4695          "\\{$r, $g, $b, $a\\};",
   4696               []>;
   4697 def SUST_B_3D_V4B16_CLAMP
   4698   : NVPTXInst<(outs),
   4699               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4700                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4701         "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   4702         "\\{$r, $g, $b, $a\\};",
   4703               []>;
   4704 def SUST_B_3D_V4B32_CLAMP
   4705   : NVPTXInst<(outs),
   4706               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4707                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4708         "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
   4709         "\\{$r, $g, $b, $a\\};",
   4710               []>;
   4711 
   4712 
   4713 // .trap variant
   4714 def SUST_B_1D_B8_TRAP
   4715   : NVPTXInst<(outs),
   4716               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   4717               "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
   4718               []>;
   4719 def SUST_B_1D_B16_TRAP
   4720   : NVPTXInst<(outs),
   4721               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   4722               "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
   4723               []>;
   4724 def SUST_B_1D_B32_TRAP
   4725   : NVPTXInst<(outs),
   4726               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   4727               "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
   4728               []>;
   4729 def SUST_B_1D_B64_TRAP
   4730   : NVPTXInst<(outs),
   4731               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
   4732               "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
   4733               []>;
   4734 def SUST_B_1D_V2B8_TRAP
   4735   : NVPTXInst<(outs),
   4736               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   4737               "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   4738               []>;
   4739 def SUST_B_1D_V2B16_TRAP
   4740   : NVPTXInst<(outs),
   4741               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   4742               "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   4743               []>;
   4744 def SUST_B_1D_V2B32_TRAP
   4745   : NVPTXInst<(outs),
   4746               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   4747               "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   4748               []>;
   4749 def SUST_B_1D_V2B64_TRAP
   4750   : NVPTXInst<(outs),
   4751               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   4752               "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   4753               []>;
   4754 def SUST_B_1D_V4B8_TRAP
   4755   : NVPTXInst<(outs),
   4756               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   4757                    Int16Regs:$b, Int16Regs:$a),
   4758               "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   4759               []>;
   4760 def SUST_B_1D_V4B16_TRAP
   4761   : NVPTXInst<(outs),
   4762               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   4763                    Int16Regs:$b, Int16Regs:$a),
   4764               "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   4765               []>;
   4766 def SUST_B_1D_V4B32_TRAP
   4767   : NVPTXInst<(outs),
   4768               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
   4769                    Int32Regs:$b, Int32Regs:$a),
   4770               "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   4771               []>;
   4772 
   4773 
   4774 def SUST_B_1D_ARRAY_B8_TRAP
   4775   : NVPTXInst<(outs),
   4776               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   4777               "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   4778               []>;
   4779 def SUST_B_1D_ARRAY_B16_TRAP
   4780   : NVPTXInst<(outs),
   4781               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   4782               "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   4783               []>;
   4784 def SUST_B_1D_ARRAY_B32_TRAP
   4785   : NVPTXInst<(outs),
   4786               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
   4787               "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   4788               []>;
   4789 def SUST_B_1D_ARRAY_B64_TRAP
   4790   : NVPTXInst<(outs),
   4791               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
   4792               "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   4793               []>;
   4794 def SUST_B_1D_ARRAY_V2B8_TRAP
   4795   : NVPTXInst<(outs),
   4796               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   4797                    Int16Regs:$g),
   4798               "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   4799               []>;
   4800 def SUST_B_1D_ARRAY_V2B16_TRAP
   4801   : NVPTXInst<(outs),
   4802               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   4803                    Int16Regs:$g),
   4804               "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   4805               []>;
   4806 def SUST_B_1D_ARRAY_V2B32_TRAP
   4807   : NVPTXInst<(outs),
   4808               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   4809                    Int32Regs:$g),
   4810               "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   4811               []>;
   4812 def SUST_B_1D_ARRAY_V2B64_TRAP
   4813   : NVPTXInst<(outs),
   4814               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
   4815                    Int64Regs:$g),
   4816               "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   4817               []>;
   4818 def SUST_B_1D_ARRAY_V4B8_TRAP
   4819   : NVPTXInst<(outs),
   4820               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   4821                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4822               "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
   4823               "\\{$r, $g, $b, $a\\};",
   4824               []>;
   4825 def SUST_B_1D_ARRAY_V4B16_TRAP
   4826   : NVPTXInst<(outs),
   4827               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   4828                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4829              "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
   4830              "\\{$r, $g, $b, $a\\};",
   4831               []>;
   4832 def SUST_B_1D_ARRAY_V4B32_TRAP
   4833   : NVPTXInst<(outs),
   4834               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   4835                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4836              "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
   4837              "\\{$r, $g, $b, $a\\};",
   4838               []>;
   4839 
   4840 
   4841 def SUST_B_2D_B8_TRAP
   4842   : NVPTXInst<(outs),
   4843               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   4844               "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   4845               []>;
   4846 def SUST_B_2D_B16_TRAP
   4847   : NVPTXInst<(outs),
   4848               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   4849               "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   4850               []>;
   4851 def SUST_B_2D_B32_TRAP
   4852   : NVPTXInst<(outs),
   4853               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   4854               "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   4855               []>;
   4856 def SUST_B_2D_B64_TRAP
   4857   : NVPTXInst<(outs),
   4858               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   4859               "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   4860               []>;
   4861 def SUST_B_2D_V2B8_TRAP
   4862   : NVPTXInst<(outs),
   4863               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   4864                    Int16Regs:$g),
   4865               "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   4866               []>;
   4867 def SUST_B_2D_V2B16_TRAP
   4868   : NVPTXInst<(outs),
   4869               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   4870                    Int16Regs:$g),
   4871               "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   4872               []>;
   4873 def SUST_B_2D_V2B32_TRAP
   4874   : NVPTXInst<(outs),
   4875               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   4876                    Int32Regs:$g),
   4877               "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   4878               []>;
   4879 def SUST_B_2D_V2B64_TRAP
   4880   : NVPTXInst<(outs),
   4881               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
   4882                    Int64Regs:$g),
   4883               "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   4884               []>;
   4885 def SUST_B_2D_V4B8_TRAP
   4886   : NVPTXInst<(outs),
   4887               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   4888                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4889               "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
   4890               "\\{$r, $g, $b, $a\\};",
   4891               []>;
   4892 def SUST_B_2D_V4B16_TRAP
   4893   : NVPTXInst<(outs),
   4894               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   4895                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4896              "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
   4897              "\\{$r, $g, $b, $a\\};",
   4898               []>;
   4899 def SUST_B_2D_V4B32_TRAP
   4900   : NVPTXInst<(outs),
   4901               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   4902                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4903              "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
   4904              "\\{$r, $g, $b, $a\\};",
   4905               []>;
   4906 
   4907 
   4908 def SUST_B_2D_ARRAY_B8_TRAP
   4909   : NVPTXInst<(outs),
   4910               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4911                    Int16Regs:$r),
   4912               "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   4913               []>;
   4914 def SUST_B_2D_ARRAY_B16_TRAP
   4915   : NVPTXInst<(outs),
   4916               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4917                    Int16Regs:$r),
   4918               "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   4919               []>;
   4920 def SUST_B_2D_ARRAY_B32_TRAP
   4921   : NVPTXInst<(outs),
   4922               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4923                    Int32Regs:$r),
   4924               "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   4925               []>;
   4926 def SUST_B_2D_ARRAY_B64_TRAP
   4927   : NVPTXInst<(outs),
   4928               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4929                    Int64Regs:$r),
   4930               "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   4931               []>;
   4932 def SUST_B_2D_ARRAY_V2B8_TRAP
   4933   : NVPTXInst<(outs),
   4934               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4935                    Int16Regs:$r, Int16Regs:$g),
   4936               "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   4937               "\\{$r, $g\\};",
   4938               []>;
   4939 def SUST_B_2D_ARRAY_V2B16_TRAP
   4940   : NVPTXInst<(outs),
   4941               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4942                    Int16Regs:$r, Int16Regs:$g),
   4943              "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   4944              "\\{$r, $g\\};",
   4945               []>;
   4946 def SUST_B_2D_ARRAY_V2B32_TRAP
   4947   : NVPTXInst<(outs),
   4948               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4949                    Int32Regs:$r, Int32Regs:$g),
   4950              "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   4951              "\\{$r, $g\\};",
   4952               []>;
   4953 def SUST_B_2D_ARRAY_V2B64_TRAP
   4954   : NVPTXInst<(outs),
   4955               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4956                    Int64Regs:$r, Int64Regs:$g),
   4957              "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   4958              "\\{$r, $g\\};",
   4959               []>;
   4960 def SUST_B_2D_ARRAY_V4B8_TRAP
   4961   : NVPTXInst<(outs),
   4962               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4963                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4964       "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   4965       "\\{$r, $g, $b, $a\\};",
   4966               []>;
   4967 def SUST_B_2D_ARRAY_V4B16_TRAP
   4968   : NVPTXInst<(outs),
   4969               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4970                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   4971      "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   4972      "\\{$r, $g, $b, $a\\};",
   4973               []>;
   4974 def SUST_B_2D_ARRAY_V4B32_TRAP
   4975   : NVPTXInst<(outs),
   4976               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   4977                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   4978      "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   4979      "\\{$r, $g, $b, $a\\};",
   4980               []>;
   4981 
   4982 
   4983 def SUST_B_3D_B8_TRAP
   4984   : NVPTXInst<(outs),
   4985               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4986                    Int16Regs:$r),
   4987               "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   4988               []>;
   4989 def SUST_B_3D_B16_TRAP
   4990   : NVPTXInst<(outs),
   4991               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4992                    Int16Regs:$r),
   4993               "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   4994               []>;
   4995 def SUST_B_3D_B32_TRAP
   4996   : NVPTXInst<(outs),
   4997               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   4998                    Int32Regs:$r),
   4999               "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5000               []>;
   5001 def SUST_B_3D_B64_TRAP
   5002   : NVPTXInst<(outs),
   5003               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5004                    Int64Regs:$r),
   5005               "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5006               []>;
   5007 def SUST_B_3D_V2B8_TRAP
   5008   : NVPTXInst<(outs),
   5009               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5010                    Int16Regs:$r, Int16Regs:$g),
   5011               "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5012               "\\{$r, $g\\};",
   5013               []>;
   5014 def SUST_B_3D_V2B16_TRAP
   5015   : NVPTXInst<(outs),
   5016               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5017                    Int16Regs:$r, Int16Regs:$g),
   5018               "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5019               "\\{$r, $g\\};",
   5020               []>;
   5021 def SUST_B_3D_V2B32_TRAP
   5022   : NVPTXInst<(outs),
   5023               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5024                    Int32Regs:$r, Int32Regs:$g),
   5025               "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5026               "\\{$r, $g\\};",
   5027               []>;
   5028 def SUST_B_3D_V2B64_TRAP
   5029   : NVPTXInst<(outs),
   5030               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5031                    Int64Regs:$r, Int64Regs:$g),
   5032               "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5033               "\\{$r, $g\\};",
   5034               []>;
   5035 def SUST_B_3D_V4B8_TRAP
   5036   : NVPTXInst<(outs),
   5037               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5038                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5039          "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5040          "\\{$r, $g, $b, $a\\};",
   5041               []>;
   5042 def SUST_B_3D_V4B16_TRAP
   5043   : NVPTXInst<(outs),
   5044               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5045                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5046         "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5047         "\\{$r, $g, $b, $a\\};",
   5048               []>;
   5049 def SUST_B_3D_V4B32_TRAP
   5050   : NVPTXInst<(outs),
   5051               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5052                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5053         "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5054         "\\{$r, $g, $b, $a\\};",
   5055               []>;
   5056 
   5057 
   5058 // .zero variant
   5059 def SUST_B_1D_B8_ZERO
   5060   : NVPTXInst<(outs),
   5061               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   5062               "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
   5063               []>;
   5064 def SUST_B_1D_B16_ZERO
   5065   : NVPTXInst<(outs),
   5066               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   5067               "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
   5068               []>;
   5069 def SUST_B_1D_B32_ZERO
   5070   : NVPTXInst<(outs),
   5071               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   5072               "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
   5073               []>;
   5074 def SUST_B_1D_B64_ZERO
   5075   : NVPTXInst<(outs),
   5076               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
   5077               "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
   5078               []>;
   5079 def SUST_B_1D_V2B8_ZERO
   5080   : NVPTXInst<(outs),
   5081               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5082               "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5083               []>;
   5084 def SUST_B_1D_V2B16_ZERO
   5085   : NVPTXInst<(outs),
   5086               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5087               "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5088               []>;
   5089 def SUST_B_1D_V2B32_ZERO
   5090   : NVPTXInst<(outs),
   5091               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   5092               "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5093               []>;
   5094 def SUST_B_1D_V2B64_ZERO
   5095   : NVPTXInst<(outs),
   5096               (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   5097               "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5098               []>;
   5099 def SUST_B_1D_V4B8_ZERO
   5100   : NVPTXInst<(outs),
   5101               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   5102                    Int16Regs:$b, Int16Regs:$a),
   5103               "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5104               []>;
   5105 def SUST_B_1D_V4B16_ZERO
   5106   : NVPTXInst<(outs),
   5107               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   5108                    Int16Regs:$b, Int16Regs:$a),
   5109               "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5110               []>;
   5111 def SUST_B_1D_V4B32_ZERO
   5112   : NVPTXInst<(outs),
   5113               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
   5114                    Int32Regs:$b, Int32Regs:$a),
   5115               "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5116               []>;
   5117 
   5118 
   5119 def SUST_B_1D_ARRAY_B8_ZERO
   5120   : NVPTXInst<(outs),
   5121               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   5122               "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5123               []>;
   5124 def SUST_B_1D_ARRAY_B16_ZERO
   5125   : NVPTXInst<(outs),
   5126               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   5127               "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5128               []>;
   5129 def SUST_B_1D_ARRAY_B32_ZERO
   5130   : NVPTXInst<(outs),
   5131               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
   5132               "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5133               []>;
   5134 def SUST_B_1D_ARRAY_B64_ZERO
   5135   : NVPTXInst<(outs),
   5136               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
   5137               "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5138               []>;
   5139 def SUST_B_1D_ARRAY_V2B8_ZERO
   5140   : NVPTXInst<(outs),
   5141               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5142                    Int16Regs:$g),
   5143               "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5144               []>;
   5145 def SUST_B_1D_ARRAY_V2B16_ZERO
   5146   : NVPTXInst<(outs),
   5147               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5148                    Int16Regs:$g),
   5149               "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5150               []>;
   5151 def SUST_B_1D_ARRAY_V2B32_ZERO
   5152   : NVPTXInst<(outs),
   5153               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   5154                    Int32Regs:$g),
   5155               "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5156               []>;
   5157 def SUST_B_1D_ARRAY_V2B64_ZERO
   5158   : NVPTXInst<(outs),
   5159               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
   5160                    Int64Regs:$g),
   5161               "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5162               []>;
   5163 def SUST_B_1D_ARRAY_V4B8_ZERO
   5164   : NVPTXInst<(outs),
   5165               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5166                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5167               "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
   5168               "\\{$r, $g, $b, $a\\};",
   5169               []>;
   5170 def SUST_B_1D_ARRAY_V4B16_ZERO
   5171   : NVPTXInst<(outs),
   5172               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5173                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5174              "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
   5175              "\\{$r, $g, $b, $a\\};",
   5176               []>;
   5177 def SUST_B_1D_ARRAY_V4B32_ZERO
   5178   : NVPTXInst<(outs),
   5179               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   5180                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5181              "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
   5182              "\\{$r, $g, $b, $a\\};",
   5183               []>;
   5184 
   5185 
   5186 def SUST_B_2D_B8_ZERO
   5187   : NVPTXInst<(outs),
   5188               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5189               "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5190               []>;
   5191 def SUST_B_2D_B16_ZERO
   5192   : NVPTXInst<(outs),
   5193               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5194               "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5195               []>;
   5196 def SUST_B_2D_B32_ZERO
   5197   : NVPTXInst<(outs),
   5198               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   5199               "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5200               []>;
   5201 def SUST_B_2D_B64_ZERO
   5202   : NVPTXInst<(outs),
   5203               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   5204               "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5205               []>;
   5206 def SUST_B_2D_V2B8_ZERO
   5207   : NVPTXInst<(outs),
   5208               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5209                    Int16Regs:$g),
   5210               "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5211               []>;
   5212 def SUST_B_2D_V2B16_ZERO
   5213   : NVPTXInst<(outs),
   5214               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5215                    Int16Regs:$g),
   5216               "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5217               []>;
   5218 def SUST_B_2D_V2B32_ZERO
   5219   : NVPTXInst<(outs),
   5220               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   5221                    Int32Regs:$g),
   5222               "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5223               []>;
   5224 def SUST_B_2D_V2B64_ZERO
   5225   : NVPTXInst<(outs),
   5226               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
   5227                    Int64Regs:$g),
   5228               "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5229               []>;
   5230 def SUST_B_2D_V4B8_ZERO
   5231   : NVPTXInst<(outs),
   5232               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5233                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5234               "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
   5235               "\\{$r, $g, $b, $a\\};",
   5236               []>;
   5237 def SUST_B_2D_V4B16_ZERO
   5238   : NVPTXInst<(outs),
   5239               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5240                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5241              "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
   5242              "\\{$r, $g, $b, $a\\};",
   5243               []>;
   5244 def SUST_B_2D_V4B32_ZERO
   5245   : NVPTXInst<(outs),
   5246               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   5247                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5248              "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
   5249              "\\{$r, $g, $b, $a\\};",
   5250               []>;
   5251 
   5252 
   5253 def SUST_B_2D_ARRAY_B8_ZERO
   5254   : NVPTXInst<(outs),
   5255               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5256                    Int16Regs:$r),
   5257               "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5258               []>;
   5259 def SUST_B_2D_ARRAY_B16_ZERO
   5260   : NVPTXInst<(outs),
   5261               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5262                    Int16Regs:$r),
   5263               "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5264               []>;
   5265 def SUST_B_2D_ARRAY_B32_ZERO
   5266   : NVPTXInst<(outs),
   5267               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5268                    Int32Regs:$r),
   5269               "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5270               []>;
   5271 def SUST_B_2D_ARRAY_B64_ZERO
   5272   : NVPTXInst<(outs),
   5273               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5274                    Int64Regs:$r),
   5275               "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5276               []>;
   5277 def SUST_B_2D_ARRAY_V2B8_ZERO
   5278   : NVPTXInst<(outs),
   5279               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5280                    Int16Regs:$r, Int16Regs:$g),
   5281               "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5282               "\\{$r, $g\\};",
   5283               []>;
   5284 def SUST_B_2D_ARRAY_V2B16_ZERO
   5285   : NVPTXInst<(outs),
   5286               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5287                    Int16Regs:$r, Int16Regs:$g),
   5288              "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5289              "\\{$r, $g\\};",
   5290               []>;
   5291 def SUST_B_2D_ARRAY_V2B32_ZERO
   5292   : NVPTXInst<(outs),
   5293               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5294                    Int32Regs:$r, Int32Regs:$g),
   5295              "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5296              "\\{$r, $g\\};",
   5297               []>;
   5298 def SUST_B_2D_ARRAY_V2B64_ZERO
   5299   : NVPTXInst<(outs),
   5300               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5301                    Int64Regs:$r, Int64Regs:$g),
   5302              "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5303              "\\{$r, $g\\};",
   5304               []>;
   5305 def SUST_B_2D_ARRAY_V4B8_ZERO
   5306   : NVPTXInst<(outs),
   5307               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5308                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5309       "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5310       "\\{$r, $g, $b, $a\\};",
   5311               []>;
   5312 def SUST_B_2D_ARRAY_V4B16_ZERO
   5313   : NVPTXInst<(outs),
   5314               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5315                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5316      "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5317      "\\{$r, $g, $b, $a\\};",
   5318               []>;
   5319 def SUST_B_2D_ARRAY_V4B32_ZERO
   5320   : NVPTXInst<(outs),
   5321               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5322                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5323      "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
   5324      "\\{$r, $g, $b, $a\\};",
   5325               []>;
   5326 
   5327 
   5328 def SUST_B_3D_B8_ZERO
   5329   : NVPTXInst<(outs),
   5330               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5331                    Int16Regs:$r),
   5332               "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5333               []>;
   5334 def SUST_B_3D_B16_ZERO
   5335   : NVPTXInst<(outs),
   5336               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5337                    Int16Regs:$r),
   5338               "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5339               []>;
   5340 def SUST_B_3D_B32_ZERO
   5341   : NVPTXInst<(outs),
   5342               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5343                    Int32Regs:$r),
   5344               "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5345               []>;
   5346 def SUST_B_3D_B64_ZERO
   5347   : NVPTXInst<(outs),
   5348               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5349                    Int64Regs:$r),
   5350               "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5351               []>;
   5352 def SUST_B_3D_V2B8_ZERO
   5353   : NVPTXInst<(outs),
   5354               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5355                    Int16Regs:$r, Int16Regs:$g),
   5356               "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5357               "\\{$r, $g\\};",
   5358               []>;
   5359 def SUST_B_3D_V2B16_ZERO
   5360   : NVPTXInst<(outs),
   5361               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5362                    Int16Regs:$r, Int16Regs:$g),
   5363               "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5364               "\\{$r, $g\\};",
   5365               []>;
   5366 def SUST_B_3D_V2B32_ZERO
   5367   : NVPTXInst<(outs),
   5368               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5369                    Int32Regs:$r, Int32Regs:$g),
   5370               "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5371               "\\{$r, $g\\};",
   5372               []>;
   5373 def SUST_B_3D_V2B64_ZERO
   5374   : NVPTXInst<(outs),
   5375               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5376                    Int64Regs:$r, Int64Regs:$g),
   5377               "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5378               "\\{$r, $g\\};",
   5379               []>;
   5380 def SUST_B_3D_V4B8_ZERO
   5381   : NVPTXInst<(outs),
   5382               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5383                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5384          "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5385          "\\{$r, $g, $b, $a\\};",
   5386               []>;
   5387 def SUST_B_3D_V4B16_ZERO
   5388   : NVPTXInst<(outs),
   5389               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5390                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5391         "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5392         "\\{$r, $g, $b, $a\\};",
   5393               []>;
   5394 def SUST_B_3D_V4B32_ZERO
   5395   : NVPTXInst<(outs),
   5396               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5397                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5398         "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
   5399         "\\{$r, $g, $b, $a\\};",
   5400               []>;
   5401 
   5402 
   5403 
   5404 // Formatted
   5405 
   5406 def SUST_P_1D_B8_TRAP
   5407   : NVPTXInst<(outs),
   5408               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   5409               "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
   5410               []>;
   5411 def SUST_P_1D_B16_TRAP
   5412   : NVPTXInst<(outs),
   5413               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   5414               "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
   5415               []>;
   5416 def SUST_P_1D_B32_TRAP
   5417   : NVPTXInst<(outs),
   5418               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   5419               "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
   5420               []>;
   5421 def SUST_P_1D_V2B8_TRAP
   5422   : NVPTXInst<(outs),
   5423               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5424               "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5425               []>;
   5426 def SUST_P_1D_V2B16_TRAP
   5427   : NVPTXInst<(outs),
   5428               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5429               "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5430               []>;
   5431 def SUST_P_1D_V2B32_TRAP
   5432   : NVPTXInst<(outs),
   5433               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   5434               "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
   5435               []>;
   5436 def SUST_P_1D_V4B8_TRAP
   5437   : NVPTXInst<(outs),
   5438               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   5439                    Int16Regs:$b, Int16Regs:$a),
   5440               "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5441               []>;
   5442 def SUST_P_1D_V4B16_TRAP
   5443   : NVPTXInst<(outs),
   5444               (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
   5445                    Int16Regs:$b, Int16Regs:$a),
   5446               "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5447               []>;
   5448 def SUST_P_1D_V4B32_TRAP
   5449   : NVPTXInst<(outs),
   5450               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
   5451                    Int32Regs:$b, Int32Regs:$a),
   5452               "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
   5453               []>;
   5454 
   5455 
   5456 def SUST_P_1D_ARRAY_B8_TRAP
   5457   : NVPTXInst<(outs),
   5458               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   5459               "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5460               []>;
   5461 def SUST_P_1D_ARRAY_B16_TRAP
   5462   : NVPTXInst<(outs),
   5463               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
   5464               "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5465               []>;
   5466 def SUST_P_1D_ARRAY_B32_TRAP
   5467   : NVPTXInst<(outs),
   5468               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
   5469               "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
   5470               []>;
   5471 def SUST_P_1D_ARRAY_V2B8_TRAP
   5472   : NVPTXInst<(outs),
   5473               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5474                    Int16Regs:$g),
   5475               "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5476               []>;
   5477 def SUST_P_1D_ARRAY_V2B16_TRAP
   5478   : NVPTXInst<(outs),
   5479               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5480                    Int16Regs:$g),
   5481               "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5482               []>;
   5483 def SUST_P_1D_ARRAY_V2B32_TRAP
   5484   : NVPTXInst<(outs),
   5485               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   5486                    Int32Regs:$g),
   5487               "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
   5488               []>;
   5489 def SUST_P_1D_ARRAY_V4B8_TRAP
   5490   : NVPTXInst<(outs),
   5491               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5492                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5493               "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
   5494               "\\{$r, $g, $b, $a\\};",
   5495               []>;
   5496 def SUST_P_1D_ARRAY_V4B16_TRAP
   5497   : NVPTXInst<(outs),
   5498               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
   5499                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5500              "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
   5501              "\\{$r, $g, $b, $a\\};",
   5502               []>;
   5503 def SUST_P_1D_ARRAY_V4B32_TRAP
   5504   : NVPTXInst<(outs),
   5505               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
   5506                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5507              "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
   5508              "\\{$r, $g, $b, $a\\};",
   5509               []>;
   5510 
   5511 
   5512 def SUST_P_2D_B8_TRAP
   5513   : NVPTXInst<(outs),
   5514               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5515               "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5516               []>;
   5517 def SUST_P_2D_B16_TRAP
   5518   : NVPTXInst<(outs),
   5519               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5520               "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5521               []>;
   5522 def SUST_P_2D_B32_TRAP
   5523   : NVPTXInst<(outs),
   5524               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   5525               "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
   5526               []>;
   5527 def SUST_P_2D_V2B8_TRAP
   5528   : NVPTXInst<(outs),
   5529               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5530                    Int16Regs:$g),
   5531               "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5532               []>;
   5533 def SUST_P_2D_V2B16_TRAP
   5534   : NVPTXInst<(outs),
   5535               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5536                    Int16Regs:$g),
   5537               "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5538               []>;
   5539 def SUST_P_2D_V2B32_TRAP
   5540   : NVPTXInst<(outs),
   5541               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   5542                    Int32Regs:$g),
   5543               "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
   5544               []>;
   5545 def SUST_P_2D_V4B8_TRAP
   5546   : NVPTXInst<(outs),
   5547               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5548                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5549               "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
   5550               "\\{$r, $g, $b, $a\\};",
   5551               []>;
   5552 def SUST_P_2D_V4B16_TRAP
   5553   : NVPTXInst<(outs),
   5554               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
   5555                    Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5556              "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
   5557              "\\{$r, $g, $b, $a\\};",
   5558               []>;
   5559 def SUST_P_2D_V4B32_TRAP
   5560   : NVPTXInst<(outs),
   5561               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   5562                    Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5563              "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
   5564              "\\{$r, $g, $b, $a\\};",
   5565               []>;
   5566 
   5567 
   5568 def SUST_P_2D_ARRAY_B8_TRAP
   5569   : NVPTXInst<(outs),
   5570               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5571                    Int16Regs:$r),
   5572               "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5573               []>;
   5574 def SUST_P_2D_ARRAY_B16_TRAP
   5575   : NVPTXInst<(outs),
   5576               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5577                    Int16Regs:$r),
   5578               "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5579               []>;
   5580 def SUST_P_2D_ARRAY_B32_TRAP
   5581   : NVPTXInst<(outs),
   5582               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5583                    Int32Regs:$r),
   5584               "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
   5585               []>;
   5586 def SUST_P_2D_ARRAY_V2B8_TRAP
   5587   : NVPTXInst<(outs),
   5588               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5589                    Int16Regs:$r, Int16Regs:$g),
   5590               "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5591               "\\{$r, $g\\};",
   5592               []>;
   5593 def SUST_P_2D_ARRAY_V2B16_TRAP
   5594   : NVPTXInst<(outs),
   5595               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5596                    Int16Regs:$r, Int16Regs:$g),
   5597              "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5598              "\\{$r, $g\\};",
   5599               []>;
   5600 def SUST_P_2D_ARRAY_V2B32_TRAP
   5601   : NVPTXInst<(outs),
   5602               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5603                    Int32Regs:$r, Int32Regs:$g),
   5604              "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5605              "\\{$r, $g\\};",
   5606               []>;
   5607 def SUST_P_2D_ARRAY_V4B8_TRAP
   5608   : NVPTXInst<(outs),
   5609               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5610                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5611       "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5612       "\\{$r, $g, $b, $a\\};",
   5613               []>;
   5614 def SUST_P_2D_ARRAY_V4B16_TRAP
   5615   : NVPTXInst<(outs),
   5616               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5617                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5618      "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5619      "\\{$r, $g, $b, $a\\};",
   5620               []>;
   5621 def SUST_P_2D_ARRAY_V4B32_TRAP
   5622   : NVPTXInst<(outs),
   5623               (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
   5624                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5625      "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
   5626      "\\{$r, $g, $b, $a\\};",
   5627               []>;
   5628 
   5629 
   5630 def SUST_P_3D_B8_TRAP
   5631   : NVPTXInst<(outs),
   5632               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5633                    Int16Regs:$r),
   5634               "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5635               []>;
   5636 def SUST_P_3D_B16_TRAP
   5637   : NVPTXInst<(outs),
   5638               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5639                    Int16Regs:$r),
   5640               "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5641               []>;
   5642 def SUST_P_3D_B32_TRAP
   5643   : NVPTXInst<(outs),
   5644               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5645                    Int32Regs:$r),
   5646               "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
   5647               []>;
   5648 def SUST_P_3D_V2B8_TRAP
   5649   : NVPTXInst<(outs),
   5650               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5651                    Int16Regs:$r, Int16Regs:$g),
   5652               "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5653               "\\{$r, $g\\};",
   5654               []>;
   5655 def SUST_P_3D_V2B16_TRAP
   5656   : NVPTXInst<(outs),
   5657               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5658                    Int16Regs:$r, Int16Regs:$g),
   5659               "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5660               "\\{$r, $g\\};",
   5661               []>;
   5662 def SUST_P_3D_V2B32_TRAP
   5663   : NVPTXInst<(outs),
   5664               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5665                    Int32Regs:$r, Int32Regs:$g),
   5666               "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5667               "\\{$r, $g\\};",
   5668               []>;
   5669 def SUST_P_3D_V4B8_TRAP
   5670   : NVPTXInst<(outs),
   5671               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5672                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5673          "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5674          "\\{$r, $g, $b, $a\\};",
   5675               []>;
   5676 def SUST_P_3D_V4B16_TRAP
   5677   : NVPTXInst<(outs),
   5678               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5679                    Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5680         "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5681         "\\{$r, $g, $b, $a\\};",
   5682               []>;
   5683 def SUST_P_3D_V4B32_TRAP
   5684   : NVPTXInst<(outs),
   5685               (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5686                    Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5687         "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
   5688         "\\{$r, $g, $b, $a\\};",
   5689               []>;
   5690 }
   5691 
   5692 // Surface store instruction patterns
   5693 // I'm not sure why we can't just include these in the instruction definitions,
   5694 // but TableGen complains of type errors :(
   5695 
   5696 // .clamp variant
   5697 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
   5698            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   5699           (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   5700 
   5701 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
   5702            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   5703           (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   5704 
   5705 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
   5706            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   5707           (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
   5708 
   5709 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
   5710            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
   5711           (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
   5712 
   5713 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
   5714            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5715           (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
   5716            Int16Regs:$r, Int16Regs:$g)>;
   5717 
   5718 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
   5719            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5720           (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
   5721            Int16Regs:$r, Int16Regs:$g)>;
   5722 
   5723 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
   5724            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   5725           (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
   5726            Int32Regs:$r, Int32Regs:$g)>;
   5727 
   5728 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
   5729            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   5730           (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
   5731            Int64Regs:$r, Int64Regs:$g)>;
   5732 
   5733 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
   5734            Int64Regs:$s, Int32Regs:$x,
   5735            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5736           (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
   5737            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   5738 
   5739 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
   5740            Int64Regs:$s, Int32Regs:$x,
   5741            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5742           (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
   5743            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   5744 
   5745 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
   5746            Int64Regs:$s, Int32Regs:$x,
   5747            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5748           (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
   5749            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   5750 
   5751 
   5752 
   5753 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
   5754            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   5755           (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5756            Int16Regs:$r)>;
   5757 
   5758 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
   5759            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   5760           (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5761            Int16Regs:$r)>;
   5762 
   5763 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
   5764            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
   5765           (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5766            Int32Regs:$r)>;
   5767 
   5768 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
   5769            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
   5770           (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5771            Int64Regs:$r)>;
   5772 
   5773 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
   5774           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5775           (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5776            Int16Regs:$r, Int16Regs:$g)>;
   5777 
   5778 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
   5779           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   5780           (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5781            Int16Regs:$r, Int16Regs:$g)>;
   5782 
   5783 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
   5784           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   5785           (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5786            Int32Regs:$r, Int32Regs:$g)>;
   5787 
   5788 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
   5789           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   5790           (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5791            Int64Regs:$r, Int64Regs:$g)>;
   5792 
   5793 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
   5794            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5795            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5796           (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5797            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   5798 
   5799 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
   5800            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5801            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5802           (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5803            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   5804 
   5805 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
   5806            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5807            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5808           (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   5809            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   5810 
   5811 
   5812 
   5813 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
   5814            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5815           (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5816            Int16Regs:$r)>;
   5817 
   5818 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
   5819            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5820           (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5821            Int16Regs:$r)>;
   5822 
   5823 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
   5824            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   5825           (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5826            Int32Regs:$r)>;
   5827 
   5828 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
   5829            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   5830           (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5831            Int64Regs:$r)>;
   5832 
   5833 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
   5834           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   5835           (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5836            Int16Regs:$r, Int16Regs:$g)>;
   5837 
   5838 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
   5839           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   5840           (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5841            Int16Regs:$r, Int16Regs:$g)>;
   5842 
   5843 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
   5844           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
   5845           (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5846            Int32Regs:$r, Int32Regs:$g)>;
   5847 
   5848 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
   5849           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
   5850           (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5851            Int64Regs:$r, Int64Regs:$g)>;
   5852 
   5853 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
   5854            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5855            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5856           (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5857            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   5858 
   5859 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
   5860            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5861            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5862           (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5863            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   5864 
   5865 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
   5866            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5867            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5868           (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   5869            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   5870 
   5871 
   5872 
   5873 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
   5874           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5875           (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
   5876            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   5877            Int16Regs:$r)>;
   5878 
   5879 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
   5880           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   5881           (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
   5882            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   5883            Int16Regs:$r)>;
   5884 
   5885 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
   5886           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   5887           (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
   5888            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   5889            Int32Regs:$r)>;
   5890 
   5891 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
   5892           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   5893           (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
   5894            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   5895            Int64Regs:$r)>;
   5896 
   5897 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
   5898            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   5899            Int16Regs:$r, Int16Regs:$g),
   5900           (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
   5901            Int32Regs:$x, Int32Regs:$y,
   5902            Int16Regs:$r, Int16Regs:$g)>;
   5903 
   5904 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
   5905            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   5906            Int16Regs:$r, Int16Regs:$g),
   5907           (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
   5908            Int32Regs:$x, Int32Regs:$y,
   5909            Int16Regs:$r, Int16Regs:$g)>;
   5910 
   5911 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
   5912            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   5913            Int32Regs:$g),
   5914           (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
   5915            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
   5916 
   5917 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
   5918            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
   5919            Int64Regs:$g),
   5920           (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
   5921            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
   5922 
   5923 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
   5924            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   5925            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5926           (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
   5927            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   5928            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   5929 
   5930 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
   5931            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   5932            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   5933           (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
   5934            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   5935            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   5936 
   5937 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
   5938            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   5939            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   5940           (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
   5941            Int32Regs:$x, Int32Regs:$y,
   5942            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   5943 
   5944 
   5945 
   5946 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
   5947            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5948            Int16Regs:$r),
   5949           (SUST_B_3D_B8_CLAMP Int64Regs:$s,
   5950            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5951            Int16Regs:$r)>;
   5952 
   5953 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
   5954            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5955            Int16Regs:$r),
   5956           (SUST_B_3D_B16_CLAMP Int64Regs:$s,
   5957            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5958            Int16Regs:$r)>;
   5959 
   5960 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
   5961            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5962            Int32Regs:$r),
   5963           (SUST_B_3D_B32_CLAMP Int64Regs:$s,
   5964            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5965            Int32Regs:$r)>;
   5966 
   5967 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
   5968            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5969            Int64Regs:$r),
   5970           (SUST_B_3D_B64_CLAMP Int64Regs:$s,
   5971            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5972            Int64Regs:$r)>;
   5973 
   5974 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
   5975            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5976            Int16Regs:$r, Int16Regs:$g),
   5977           (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
   5978            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5979            Int16Regs:$r, Int16Regs:$g)>;
   5980 
   5981 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
   5982            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5983            Int16Regs:$r, Int16Regs:$g),
   5984           (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
   5985            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5986            Int16Regs:$r, Int16Regs:$g)>;
   5987 
   5988 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
   5989            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5990            Int32Regs:$r, Int32Regs:$g),
   5991           (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
   5992            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5993            Int32Regs:$r, Int32Regs:$g)>;
   5994 
   5995 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
   5996            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   5997            Int64Regs:$r, Int64Regs:$g),
   5998           (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
   5999            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6000            Int64Regs:$r, Int64Regs:$g)>;
   6001 
   6002 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
   6003            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6004            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6005           (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
   6006            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6007            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6008 
   6009 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
   6010            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6011            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6012           (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
   6013            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6014            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6015 
   6016 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
   6017            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6018            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6019           (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
   6020            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6021            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6022 
   6023 
   6024 // .trap variant
   6025 def : Pat<(int_nvvm_sust_b_1d_i8_trap
   6026            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   6027           (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   6028 
   6029 def : Pat<(int_nvvm_sust_b_1d_i16_trap
   6030            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   6031           (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   6032 
   6033 def : Pat<(int_nvvm_sust_b_1d_i32_trap
   6034            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   6035           (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
   6036 
   6037 def : Pat<(int_nvvm_sust_b_1d_i64_trap
   6038            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
   6039           (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
   6040 
   6041 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
   6042            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6043           (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
   6044            Int16Regs:$r, Int16Regs:$g)>;
   6045 
   6046 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
   6047            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6048           (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
   6049            Int16Regs:$r, Int16Regs:$g)>;
   6050 
   6051 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
   6052            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   6053           (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
   6054            Int32Regs:$r, Int32Regs:$g)>;
   6055 
   6056 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
   6057            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   6058           (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
   6059            Int64Regs:$r, Int64Regs:$g)>;
   6060 
   6061 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
   6062            Int64Regs:$s, Int32Regs:$x,
   6063            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6064           (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
   6065            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6066 
   6067 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
   6068            Int64Regs:$s, Int32Regs:$x,
   6069            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6070           (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
   6071            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6072 
   6073 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
   6074            Int64Regs:$s, Int32Regs:$x,
   6075            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6076           (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
   6077            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6078 
   6079 
   6080 
   6081 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
   6082            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   6083           (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6084            Int16Regs:$r)>;
   6085 
   6086 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
   6087            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   6088           (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6089            Int16Regs:$r)>;
   6090 
   6091 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
   6092            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
   6093           (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6094            Int32Regs:$r)>;
   6095 
   6096 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
   6097            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
   6098           (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6099            Int64Regs:$r)>;
   6100 
   6101 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
   6102           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6103           (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6104            Int16Regs:$r, Int16Regs:$g)>;
   6105 
   6106 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
   6107           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6108           (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6109            Int16Regs:$r, Int16Regs:$g)>;
   6110 
   6111 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
   6112           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   6113           (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6114            Int32Regs:$r, Int32Regs:$g)>;
   6115 
   6116 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
   6117           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   6118           (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6119            Int64Regs:$r, Int64Regs:$g)>;
   6120 
   6121 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
   6122            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6123            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6124           (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6125            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6126 
   6127 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
   6128            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6129            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6130           (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6131            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6132 
   6133 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
   6134            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6135            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6136           (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6137            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6138 
   6139 
   6140 
   6141 def : Pat<(int_nvvm_sust_b_2d_i8_trap
   6142            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6143           (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6144            Int16Regs:$r)>;
   6145 
   6146 def : Pat<(int_nvvm_sust_b_2d_i16_trap
   6147            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6148           (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6149            Int16Regs:$r)>;
   6150 
   6151 def : Pat<(int_nvvm_sust_b_2d_i32_trap
   6152            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   6153           (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6154            Int32Regs:$r)>;
   6155 
   6156 def : Pat<(int_nvvm_sust_b_2d_i64_trap
   6157            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   6158           (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6159            Int64Regs:$r)>;
   6160 
   6161 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
   6162           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   6163           (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6164            Int16Regs:$r, Int16Regs:$g)>;
   6165 
   6166 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
   6167           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   6168           (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6169            Int16Regs:$r, Int16Regs:$g)>;
   6170 
   6171 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
   6172           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
   6173           (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6174            Int32Regs:$r, Int32Regs:$g)>;
   6175 
   6176 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
   6177           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
   6178           (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6179            Int64Regs:$r, Int64Regs:$g)>;
   6180 
   6181 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
   6182            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6183            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6184           (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6185            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6186 
   6187 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
   6188            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6189            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6190           (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6191            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6192 
   6193 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
   6194            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6195            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6196           (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6197            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6198 
   6199 
   6200 
   6201 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
   6202           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6203           (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
   6204            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6205            Int16Regs:$r)>;
   6206 
   6207 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
   6208           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6209           (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
   6210            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6211            Int16Regs:$r)>;
   6212 
   6213 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
   6214           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   6215           (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
   6216            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6217            Int32Regs:$r)>;
   6218 
   6219 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
   6220           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   6221           (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
   6222            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6223            Int64Regs:$r)>;
   6224 
   6225 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
   6226            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6227            Int16Regs:$r, Int16Regs:$g),
   6228           (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
   6229            Int32Regs:$x, Int32Regs:$y,
   6230            Int16Regs:$r, Int16Regs:$g)>;
   6231 
   6232 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
   6233            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6234            Int16Regs:$r, Int16Regs:$g),
   6235           (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
   6236            Int32Regs:$x, Int32Regs:$y,
   6237            Int16Regs:$r, Int16Regs:$g)>;
   6238 
   6239 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
   6240            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   6241            Int32Regs:$g),
   6242           (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
   6243            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
   6244 
   6245 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
   6246            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
   6247            Int64Regs:$g),
   6248           (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
   6249            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
   6250 
   6251 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
   6252            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6253            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6254           (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
   6255            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6256            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6257 
   6258 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
   6259            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6260            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6261           (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
   6262            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6263            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6264 
   6265 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
   6266            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6267            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6268           (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
   6269            Int32Regs:$x, Int32Regs:$y,
   6270            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6271 
   6272 
   6273 
   6274 def : Pat<(int_nvvm_sust_b_3d_i8_trap
   6275            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6276            Int16Regs:$r),
   6277           (SUST_B_3D_B8_TRAP Int64Regs:$s,
   6278            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6279            Int16Regs:$r)>;
   6280 
   6281 def : Pat<(int_nvvm_sust_b_3d_i16_trap
   6282            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6283            Int16Regs:$r),
   6284           (SUST_B_3D_B16_TRAP Int64Regs:$s,
   6285            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6286            Int16Regs:$r)>;
   6287 
   6288 def : Pat<(int_nvvm_sust_b_3d_i32_trap
   6289            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6290            Int32Regs:$r),
   6291           (SUST_B_3D_B32_TRAP Int64Regs:$s,
   6292            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6293            Int32Regs:$r)>;
   6294 
   6295 def : Pat<(int_nvvm_sust_b_3d_i64_trap
   6296            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6297            Int64Regs:$r),
   6298           (SUST_B_3D_B64_TRAP Int64Regs:$s,
   6299            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6300            Int64Regs:$r)>;
   6301 
   6302 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
   6303            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6304            Int16Regs:$r, Int16Regs:$g),
   6305           (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
   6306            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6307            Int16Regs:$r, Int16Regs:$g)>;
   6308 
   6309 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
   6310            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6311            Int16Regs:$r, Int16Regs:$g),
   6312           (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
   6313            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6314            Int16Regs:$r, Int16Regs:$g)>;
   6315 
   6316 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
   6317            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6318            Int32Regs:$r, Int32Regs:$g),
   6319           (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
   6320            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6321            Int32Regs:$r, Int32Regs:$g)>;
   6322 
   6323 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
   6324            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6325            Int64Regs:$r, Int64Regs:$g),
   6326           (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
   6327            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6328            Int64Regs:$r, Int64Regs:$g)>;
   6329 
   6330 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
   6331            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6332            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6333           (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
   6334            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6335            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6336 
   6337 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
   6338            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6339            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6340           (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
   6341            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6342            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6343 
   6344 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
   6345            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6346            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6347           (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
   6348            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6349            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6350 
   6351 
   6352 // .zero variant
   6353 def : Pat<(int_nvvm_sust_b_1d_i8_zero
   6354            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   6355           (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   6356 
   6357 def : Pat<(int_nvvm_sust_b_1d_i16_zero
   6358            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   6359           (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   6360 
   6361 def : Pat<(int_nvvm_sust_b_1d_i32_zero
   6362            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   6363           (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
   6364 
   6365 def : Pat<(int_nvvm_sust_b_1d_i64_zero
   6366            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
   6367           (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
   6368 
   6369 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
   6370            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6371           (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
   6372            Int16Regs:$r, Int16Regs:$g)>;
   6373 
   6374 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
   6375            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6376           (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
   6377            Int16Regs:$r, Int16Regs:$g)>;
   6378 
   6379 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
   6380            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   6381           (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
   6382            Int32Regs:$r, Int32Regs:$g)>;
   6383 
   6384 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
   6385            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   6386           (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
   6387            Int64Regs:$r, Int64Regs:$g)>;
   6388 
   6389 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
   6390            Int64Regs:$s, Int32Regs:$x,
   6391            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6392           (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
   6393            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6394 
   6395 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
   6396            Int64Regs:$s, Int32Regs:$x,
   6397            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6398           (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
   6399            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6400 
   6401 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
   6402            Int64Regs:$s, Int32Regs:$x,
   6403            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6404           (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
   6405            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6406 
   6407 
   6408 
   6409 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
   6410            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   6411           (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6412            Int16Regs:$r)>;
   6413 
   6414 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
   6415            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   6416           (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6417            Int16Regs:$r)>;
   6418 
   6419 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
   6420            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
   6421           (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6422            Int32Regs:$r)>;
   6423 
   6424 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
   6425            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
   6426           (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6427            Int64Regs:$r)>;
   6428 
   6429 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
   6430           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6431           (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6432            Int16Regs:$r, Int16Regs:$g)>;
   6433 
   6434 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
   6435           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6436           (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6437            Int16Regs:$r, Int16Regs:$g)>;
   6438 
   6439 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
   6440           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   6441           (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6442            Int32Regs:$r, Int32Regs:$g)>;
   6443 
   6444 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
   6445           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
   6446           (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6447            Int64Regs:$r, Int64Regs:$g)>;
   6448 
   6449 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
   6450            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6451            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6452           (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6453            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6454 
   6455 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
   6456            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6457            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6458           (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6459            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6460 
   6461 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
   6462            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6463            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6464           (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6465            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6466 
   6467 
   6468 
   6469 def : Pat<(int_nvvm_sust_b_2d_i8_zero
   6470            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6471           (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6472            Int16Regs:$r)>;
   6473 
   6474 def : Pat<(int_nvvm_sust_b_2d_i16_zero
   6475            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6476           (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6477            Int16Regs:$r)>;
   6478 
   6479 def : Pat<(int_nvvm_sust_b_2d_i32_zero
   6480            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   6481           (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6482            Int32Regs:$r)>;
   6483 
   6484 def : Pat<(int_nvvm_sust_b_2d_i64_zero
   6485            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   6486           (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6487            Int64Regs:$r)>;
   6488 
   6489 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
   6490           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   6491           (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6492            Int16Regs:$r, Int16Regs:$g)>;
   6493 
   6494 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
   6495           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   6496           (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6497            Int16Regs:$r, Int16Regs:$g)>;
   6498 
   6499 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
   6500           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
   6501           (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6502            Int32Regs:$r, Int32Regs:$g)>;
   6503 
   6504 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
   6505           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
   6506           (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6507            Int64Regs:$r, Int64Regs:$g)>;
   6508 
   6509 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
   6510            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6511            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6512           (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6513            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6514 
   6515 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
   6516            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6517            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6518           (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6519            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6520 
   6521 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
   6522            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6523            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6524           (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6525            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6526 
   6527 
   6528 
   6529 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
   6530           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6531           (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
   6532            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6533            Int16Regs:$r)>;
   6534 
   6535 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
   6536           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6537           (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
   6538            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6539            Int16Regs:$r)>;
   6540 
   6541 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
   6542           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   6543           (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
   6544            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6545            Int32Regs:$r)>;
   6546 
   6547 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
   6548           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
   6549           (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
   6550            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6551            Int64Regs:$r)>;
   6552 
   6553 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
   6554            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6555            Int16Regs:$r, Int16Regs:$g),
   6556           (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
   6557            Int32Regs:$x, Int32Regs:$y,
   6558            Int16Regs:$r, Int16Regs:$g)>;
   6559 
   6560 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
   6561            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6562            Int16Regs:$r, Int16Regs:$g),
   6563           (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
   6564            Int32Regs:$x, Int32Regs:$y,
   6565            Int16Regs:$r, Int16Regs:$g)>;
   6566 
   6567 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
   6568            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   6569            Int32Regs:$g),
   6570           (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
   6571            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
   6572 
   6573 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
   6574            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
   6575            Int64Regs:$g),
   6576           (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
   6577            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
   6578 
   6579 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
   6580            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6581            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6582           (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
   6583            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6584            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6585 
   6586 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
   6587            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6588            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6589           (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
   6590            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6591            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6592 
   6593 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
   6594            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6595            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6596           (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
   6597            Int32Regs:$x, Int32Regs:$y,
   6598            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6599 
   6600 
   6601 
   6602 def : Pat<(int_nvvm_sust_b_3d_i8_zero
   6603            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6604            Int16Regs:$r),
   6605           (SUST_B_3D_B8_ZERO Int64Regs:$s,
   6606            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6607            Int16Regs:$r)>;
   6608 
   6609 def : Pat<(int_nvvm_sust_b_3d_i16_zero
   6610            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6611            Int16Regs:$r),
   6612           (SUST_B_3D_B16_ZERO Int64Regs:$s,
   6613            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6614            Int16Regs:$r)>;
   6615 
   6616 def : Pat<(int_nvvm_sust_b_3d_i32_zero
   6617            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6618            Int32Regs:$r),
   6619           (SUST_B_3D_B32_ZERO Int64Regs:$s,
   6620            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6621            Int32Regs:$r)>;
   6622 
   6623 def : Pat<(int_nvvm_sust_b_3d_i64_zero
   6624            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6625            Int64Regs:$r),
   6626           (SUST_B_3D_B64_ZERO Int64Regs:$s,
   6627            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6628            Int64Regs:$r)>;
   6629 
   6630 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
   6631            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6632            Int16Regs:$r, Int16Regs:$g),
   6633           (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
   6634            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6635            Int16Regs:$r, Int16Regs:$g)>;
   6636 
   6637 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
   6638            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6639            Int16Regs:$r, Int16Regs:$g),
   6640           (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
   6641            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6642            Int16Regs:$r, Int16Regs:$g)>;
   6643 
   6644 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
   6645            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6646            Int32Regs:$r, Int32Regs:$g),
   6647           (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
   6648            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6649            Int32Regs:$r, Int32Regs:$g)>;
   6650 
   6651 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
   6652            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6653            Int64Regs:$r, Int64Regs:$g),
   6654           (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
   6655            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6656            Int64Regs:$r, Int64Regs:$g)>;
   6657 
   6658 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
   6659            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6660            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6661           (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
   6662            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6663            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6664 
   6665 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
   6666            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6667            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6668           (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
   6669            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6670            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6671 
   6672 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
   6673            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6674            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6675           (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
   6676            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6677            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6678 
   6679 
   6680 
   6681 
   6682 def : Pat<(int_nvvm_sust_p_1d_i8_trap
   6683            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   6684           (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   6685 
   6686 def : Pat<(int_nvvm_sust_p_1d_i16_trap
   6687            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
   6688           (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
   6689 
   6690 def : Pat<(int_nvvm_sust_p_1d_i32_trap
   6691            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
   6692           (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
   6693 
   6694 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
   6695            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6696           (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
   6697            Int16Regs:$r, Int16Regs:$g)>;
   6698 
   6699 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
   6700            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6701           (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
   6702            Int16Regs:$r, Int16Regs:$g)>;
   6703 
   6704 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
   6705            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   6706           (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
   6707            Int32Regs:$r, Int32Regs:$g)>;
   6708 
   6709 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
   6710            Int64Regs:$s, Int32Regs:$x,
   6711            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6712           (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
   6713            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6714 
   6715 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
   6716            Int64Regs:$s, Int32Regs:$x,
   6717            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6718           (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
   6719            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6720 
   6721 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
   6722            Int64Regs:$s, Int32Regs:$x,
   6723            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6724           (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
   6725            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6726 
   6727 
   6728 
   6729 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
   6730            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   6731           (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6732            Int16Regs:$r)>;
   6733 
   6734 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
   6735            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
   6736           (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6737            Int16Regs:$r)>;
   6738 
   6739 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
   6740            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
   6741           (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6742            Int32Regs:$r)>;
   6743 
   6744 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
   6745           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6746           (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6747            Int16Regs:$r, Int16Regs:$g)>;
   6748 
   6749 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
   6750           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
   6751           (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6752            Int16Regs:$r, Int16Regs:$g)>;
   6753 
   6754 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
   6755           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
   6756           (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6757            Int32Regs:$r, Int32Regs:$g)>;
   6758 
   6759 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
   6760            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6761            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6762           (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6763            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6764 
   6765 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
   6766            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6767            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6768           (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6769            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6770 
   6771 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
   6772            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6773            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6774           (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
   6775            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6776 
   6777 
   6778 
   6779 def : Pat<(int_nvvm_sust_p_2d_i8_trap
   6780            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6781           (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6782            Int16Regs:$r)>;
   6783 
   6784 def : Pat<(int_nvvm_sust_p_2d_i16_trap
   6785            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6786           (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6787            Int16Regs:$r)>;
   6788 
   6789 def : Pat<(int_nvvm_sust_p_2d_i32_trap
   6790            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   6791           (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6792            Int32Regs:$r)>;
   6793 
   6794 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
   6795           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   6796           (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6797            Int16Regs:$r, Int16Regs:$g)>;
   6798 
   6799 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
   6800           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
   6801           (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6802            Int16Regs:$r, Int16Regs:$g)>;
   6803 
   6804 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
   6805           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
   6806           (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6807            Int32Regs:$r, Int32Regs:$g)>;
   6808 
   6809 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
   6810            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6811            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6812           (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6813            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6814 
   6815 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
   6816            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6817            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6818           (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6819            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6820 
   6821 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
   6822            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6823            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6824           (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
   6825            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6826 
   6827 
   6828 
   6829 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
   6830           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6831           (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
   6832            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6833            Int16Regs:$r)>;
   6834 
   6835 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
   6836           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
   6837           (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
   6838            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6839            Int16Regs:$r)>;
   6840 
   6841 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
   6842           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
   6843           (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
   6844            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6845            Int32Regs:$r)>;
   6846 
   6847 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
   6848            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6849            Int16Regs:$r, Int16Regs:$g),
   6850           (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
   6851            Int32Regs:$x, Int32Regs:$y,
   6852            Int16Regs:$r, Int16Regs:$g)>;
   6853 
   6854 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
   6855            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6856            Int16Regs:$r, Int16Regs:$g),
   6857           (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
   6858            Int32Regs:$x, Int32Regs:$y,
   6859            Int16Regs:$r, Int16Regs:$g)>;
   6860 
   6861 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
   6862            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
   6863            Int32Regs:$g),
   6864           (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
   6865            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
   6866 
   6867 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
   6868            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6869            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6870           (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
   6871            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6872            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6873 
   6874 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
   6875            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6876            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6877           (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
   6878            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6879            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6880 
   6881 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
   6882            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
   6883            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6884           (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
   6885            Int32Regs:$x, Int32Regs:$y,
   6886            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6887 
   6888 
   6889 
   6890 def : Pat<(int_nvvm_sust_p_3d_i8_trap
   6891            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6892            Int16Regs:$r),
   6893           (SUST_P_3D_B8_TRAP Int64Regs:$s,
   6894            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6895            Int16Regs:$r)>;
   6896 
   6897 def : Pat<(int_nvvm_sust_p_3d_i16_trap
   6898            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6899            Int16Regs:$r),
   6900           (SUST_P_3D_B16_TRAP Int64Regs:$s,
   6901            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6902            Int16Regs:$r)>;
   6903 
   6904 def : Pat<(int_nvvm_sust_p_3d_i32_trap
   6905            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6906            Int32Regs:$r),
   6907           (SUST_P_3D_B32_TRAP Int64Regs:$s,
   6908            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6909            Int32Regs:$r)>;
   6910 
   6911 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
   6912            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6913            Int16Regs:$r, Int16Regs:$g),
   6914           (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
   6915            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6916            Int16Regs:$r, Int16Regs:$g)>;
   6917 
   6918 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
   6919            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6920            Int16Regs:$r, Int16Regs:$g),
   6921           (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
   6922            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6923            Int16Regs:$r, Int16Regs:$g)>;
   6924 
   6925 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
   6926            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6927            Int32Regs:$r, Int32Regs:$g),
   6928           (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
   6929            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6930            Int32Regs:$r, Int32Regs:$g)>;
   6931 
   6932 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
   6933            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6934            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6935           (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
   6936            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6937            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6938 
   6939 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
   6940            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6941            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
   6942           (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
   6943            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6944            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
   6945 
   6946 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
   6947            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6948            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
   6949           (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
   6950            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
   6951            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
   6952 
   6953 
   6954 
   6955 //===-- Old PTX Back-end Intrinsics ---------------------------------------===//
   6956 
   6957 // These intrinsics are handled to retain compatibility with the old backend.
   6958 
   6959 // PTX Special Purpose Register Accessor Intrinsics
   6960 
   6961 class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
   6962   : NVPTXInst<(outs Int64Regs:$d), (ins),
   6963               !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
   6964               [(set Int64Regs:$d, (intop))]>;
   6965 
   6966 class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
   6967   : NVPTXInst<(outs Int32Regs:$d), (ins),
   6968               !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
   6969               [(set Int32Regs:$d, (intop))]>;
   6970 
   6971 // TODO Add read vector-version of special registers
   6972 
   6973 def PTX_READ_TID_X   : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
   6974                                                      int_ptx_read_tid_x>;
   6975 def PTX_READ_TID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
   6976                                                      int_ptx_read_tid_y>;
   6977 def PTX_READ_TID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
   6978                                                      int_ptx_read_tid_z>;
   6979 def PTX_READ_TID_W   : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
   6980                                                      int_ptx_read_tid_w>;
   6981 
   6982 def PTX_READ_NTID_X   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
   6983                                                       int_ptx_read_ntid_x>;
   6984 def PTX_READ_NTID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
   6985                                                       int_ptx_read_ntid_y>;
   6986 def PTX_READ_NTID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
   6987                                                       int_ptx_read_ntid_z>;
   6988 def PTX_READ_NTID_W   : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
   6989                                                       int_ptx_read_ntid_w>;
   6990 
   6991 def PTX_READ_LANEID  : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
   6992                                                      int_ptx_read_laneid>;
   6993 def PTX_READ_WARPID  : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
   6994                                                      int_ptx_read_warpid>;
   6995 def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
   6996                                                      int_ptx_read_nwarpid>;
   6997 
   6998 def PTX_READ_CTAID_X   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
   6999                                                        int_ptx_read_ctaid_x>;
   7000 def PTX_READ_CTAID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
   7001                                                        int_ptx_read_ctaid_y>;
   7002 def PTX_READ_CTAID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
   7003                                                        int_ptx_read_ctaid_z>;
   7004 def PTX_READ_CTAID_W   : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
   7005                                                        int_ptx_read_ctaid_w>;
   7006 
   7007 def PTX_READ_NCTAID_X   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
   7008                                                         int_ptx_read_nctaid_x>;
   7009 def PTX_READ_NCTAID_Y   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
   7010                                                         int_ptx_read_nctaid_y>;
   7011 def PTX_READ_NCTAID_Z   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
   7012                                                         int_ptx_read_nctaid_z>;
   7013 def PTX_READ_NCTAID_W   : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
   7014                                                         int_ptx_read_nctaid_w>;
   7015 
   7016 def PTX_READ_SMID  : PTX_READ_SPECIAL_REGISTER_R32<"smid",
   7017                                                    int_ptx_read_smid>;
   7018 def PTX_READ_NSMID  : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
   7019                                                     int_ptx_read_nsmid>;
   7020 def PTX_READ_GRIDID  : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
   7021                                                      int_ptx_read_gridid>;
   7022 
   7023 def PTX_READ_LANEMASK_EQ
   7024   : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
   7025 def PTX_READ_LANEMASK_LE
   7026   : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
   7027 def PTX_READ_LANEMASK_LT
   7028   : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
   7029 def PTX_READ_LANEMASK_GE
   7030   : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
   7031 def PTX_READ_LANEMASK_GT
   7032   : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
   7033 
   7034 def PTX_READ_CLOCK
   7035   : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
   7036 def PTX_READ_CLOCK64
   7037   : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
   7038 
   7039 def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
   7040 def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
   7041 def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
   7042 def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
   7043 
   7044 // PTX Parallel Synchronization and Communication Intrinsics
   7045 
   7046 def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
   7047                              [(int_ptx_bar_sync imm:$i)]>;
   7048