1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 def immFloat0 : PatLeaf<(fpimm), [{ 11 float f = (float)N->getValueAPF().convertToFloat(); 12 return (f==0.0f); 13 }]>; 14 15 def immFloat1 : PatLeaf<(fpimm), [{ 16 float f = (float)N->getValueAPF().convertToFloat(); 17 return (f==1.0f); 18 }]>; 19 20 def immDouble0 : PatLeaf<(fpimm), [{ 21 double d = (double)N->getValueAPF().convertToDouble(); 22 return (d==0.0); 23 }]>; 24 25 def immDouble1 : PatLeaf<(fpimm), [{ 26 double d = (double)N->getValueAPF().convertToDouble(); 27 return (d==1.0); 28 }]>; 29 30 31 32 //----------------------------------- 33 // Synchronization and shuffle functions 34 //----------------------------------- 35 let isConvergent = 1 in { 36 def INT_BARRIER0 : NVPTXInst<(outs), (ins), 37 "bar.sync \t0;", 38 [(int_nvvm_barrier0)]>; 39 def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), 40 "bar.sync \t$src1;", 41 [(int_nvvm_barrier_n Int32Regs:$src1)]>; 42 def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), 43 "bar.sync \t$src1, $src2;", 44 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; 45 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 46 !strconcat("{{ \n\t", 47 ".reg .pred \t%p1; \n\t", 48 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 49 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 50 "}}"), 51 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 52 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 53 !strconcat("{{ \n\t", 54 ".reg .pred \t%p1; \n\t", 55 ".reg .pred \t%p2; \n\t", 56 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 57 "bar.red.and.pred \t%p2, 0, %p1; \n\t", 58 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 59 "}}"), 60 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 61 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 62 !strconcat("{{ \n\t", 63 ".reg .pred \t%p1; \n\t", 64 ".reg .pred \t%p2; \n\t", 65 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 66 "bar.red.or.pred \t%p2, 0, %p1; \n\t", 67 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 68 "}}"), 69 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 70 71 def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", 72 [(int_nvvm_bar_sync imm:$i)]>; 73 74 def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", 75 [(int_nvvm_bar_warp_sync imm:$i)]>, 76 Requires<[hasPTX60, hasSM30]>; 77 def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", 78 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, 79 Requires<[hasPTX60, hasSM30]>; 80 81 def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", 82 [(int_nvvm_barrier_sync imm:$i)]>, 83 Requires<[hasPTX60, hasSM30]>; 84 def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", 85 [(int_nvvm_barrier_sync Int32Regs:$i)]>, 86 Requires<[hasPTX60, hasSM30]>; 87 88 def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), 89 "barrier.sync \t$id, $cnt;", 90 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, 91 Requires<[hasPTX60, hasSM30]>; 92 def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), 93 "barrier.sync \t$id, $cnt;", 94 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, 95 Requires<[hasPTX60, hasSM30]>; 96 def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), 97 "barrier.sync \t$id, $cnt;", 98 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, 99 Requires<[hasPTX60, hasSM30]>; 100 def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), 101 "barrier.sync \t$id, $cnt;", 102 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, 103 Requires<[hasPTX60, hasSM30]>; 104 105 106 // shfl.{up,down,bfly,idx}.b32 107 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 108 // The last two parameters to shfl can be regs or imms. ptxas is smart 109 // enough to inline constant registers, so strictly speaking we don't need to 110 // handle immediates here. But it's easy enough, and it makes our ptx more 111 // readable. 112 def reg : NVPTXInst< 113 (outs regclass:$dst), 114 (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask), 115 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), 116 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>; 117 118 def imm1 : NVPTXInst< 119 (outs regclass:$dst), 120 (ins regclass:$src, i32imm:$offset, Int32Regs:$mask), 121 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), 122 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>; 123 124 def imm2 : NVPTXInst< 125 (outs regclass:$dst), 126 (ins regclass:$src, Int32Regs:$offset, i32imm:$mask), 127 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), 128 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>; 129 130 def imm3 : NVPTXInst< 131 (outs regclass:$dst), 132 (ins regclass:$src, i32imm:$offset, i32imm:$mask), 133 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), 134 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>; 135 } 136 137 defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>; 138 defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>; 139 defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>; 140 defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>; 141 defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>; 142 defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>; 143 defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>; 144 defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>; 145 146 multiclass SHFL_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 147 // Threadmask and the last two parameters to shfl.sync can be regs or imms. 148 // ptxas is smart enough to inline constant registers, so strictly speaking we 149 // don't need to handle immediates here. But it's easy enough, and it makes 150 // our ptx more readable. 151 def rrr : NVPTXInst< 152 (outs regclass:$dst), 153 (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask), 154 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 155 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, 156 Int32Regs:$offset, Int32Regs:$mask))]>; 157 158 def rri : NVPTXInst< 159 (outs regclass:$dst), 160 (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask), 161 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 162 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, 163 Int32Regs:$offset, imm:$mask))]>; 164 165 def rir : NVPTXInst< 166 (outs regclass:$dst), 167 (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask), 168 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 169 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, 170 imm:$offset, Int32Regs:$mask))]>; 171 172 def rii : NVPTXInst< 173 (outs regclass:$dst), 174 (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask), 175 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 176 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, 177 imm:$offset, imm:$mask))]>; 178 179 def irr : NVPTXInst< 180 (outs regclass:$dst), 181 (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask), 182 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 183 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, 184 Int32Regs:$offset, Int32Regs:$mask))]>; 185 186 def iri : NVPTXInst< 187 (outs regclass:$dst), 188 (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask), 189 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 190 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, 191 Int32Regs:$offset, imm:$mask))]>; 192 193 def iir : NVPTXInst< 194 (outs regclass:$dst), 195 (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask), 196 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 197 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, 198 imm:$offset, Int32Regs:$mask))]>; 199 200 def iii : NVPTXInst< 201 (outs regclass:$dst), 202 (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask), 203 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 204 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, 205 imm:$offset, imm:$mask))]>; 206 } 207 208 // On sm_70 these don't have to be convergent, so we may eventually want to 209 // implement non-convergent variant of this intrinsic. 210 defm INT_SHFL_SYNC_DOWN_I32 : SHFL_SYNC<Int32Regs, "down", int_nvvm_shfl_sync_down_i32>; 211 defm INT_SHFL_SYNC_DOWN_F32 : SHFL_SYNC<Float32Regs, "down", int_nvvm_shfl_sync_down_f32>; 212 defm INT_SHFL_SYNC_UP_I32 : SHFL_SYNC<Int32Regs, "up", int_nvvm_shfl_sync_up_i32>; 213 defm INT_SHFL_SYNC_UP_F32 : SHFL_SYNC<Float32Regs, "up", int_nvvm_shfl_sync_up_f32>; 214 defm INT_SHFL_SYNC_BFLY_I32 : SHFL_SYNC<Int32Regs, "bfly", int_nvvm_shfl_sync_bfly_i32>; 215 defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC<Float32Regs, "bfly", int_nvvm_shfl_sync_bfly_f32>; 216 defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC<Int32Regs, "idx", int_nvvm_shfl_sync_idx_i32>; 217 defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC<Float32Regs, "idx", int_nvvm_shfl_sync_idx_f32>; 218 219 220 // vote.{all,any,uni,ballot} 221 multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 222 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), 223 "vote." # mode # " \t$dest, $pred;", 224 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, 225 Requires<[hasPTX60, hasSM30]>; 226 } 227 228 defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; 229 defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; 230 defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; 231 defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; 232 233 // vote.sync.{all,any,uni,ballot} 234 multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 235 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), 236 "vote.sync." # mode # " \t$dest, $pred, $mask;", 237 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, 238 Requires<[hasPTX60, hasSM30]>; 239 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), 240 "vote.sync." # mode #" \t$dest, $pred, $mask;", 241 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, 242 Requires<[hasPTX60, hasSM30]>; 243 } 244 245 defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; 246 defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; 247 defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; 248 defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; 249 250 multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 251 Operand ImmOp> { 252 def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value), 253 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 254 [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>, 255 Requires<[hasPTX60, hasSM70]>; 256 def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value), 257 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 258 [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, 259 Requires<[hasPTX60, hasSM70]>; 260 def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value), 261 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 262 [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>, 263 Requires<[hasPTX60, hasSM70]>; 264 def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value), 265 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 266 [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, 267 Requires<[hasPTX60, hasSM70]>; 268 } 269 270 defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, 271 i32imm>; 272 defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64, 273 i64imm>; 274 275 multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 276 Operand ImmOp> { 277 def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 278 (ins i32imm:$mask, ImmOp:$value), 279 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 280 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, 281 Requires<[hasPTX60, hasSM70]>; 282 def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 283 (ins Int32Regs:$mask, ImmOp:$value), 284 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 285 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, 286 Requires<[hasPTX60, hasSM70]>; 287 def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 288 (ins i32imm:$mask, regclass:$value), 289 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 290 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, 291 Requires<[hasPTX60, hasSM70]>; 292 def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 293 (ins Int32Regs:$mask, regclass:$value), 294 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 295 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, 296 Requires<[hasPTX60, hasSM70]>; 297 } 298 defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, 299 i32imm>; 300 defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, 301 i64imm>; 302 303 } // isConvergent = 1 304 305 //----------------------------------- 306 // Explicit Memory Fence Functions 307 //----------------------------------- 308 class MEMBAR<string StrOp, Intrinsic IntOP> : 309 NVPTXInst<(outs), (ins), 310 StrOp, [(IntOP)]>; 311 312 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 313 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 314 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 315 316 317 //----------------------------------- 318 // Math Functions 319 //----------------------------------- 320 321 // Map min(1.0, max(0.0, x)) to sat(x) 322 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 323 // NaN 324 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 325 // Same story for fmax, fmin. 326 327 def : Pat<(int_nvvm_fmin_f immFloat1, 328 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 329 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 330 def : Pat<(int_nvvm_fmin_f immFloat1, 331 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 332 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 333 def : Pat<(int_nvvm_fmin_f 334 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 335 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 336 def : Pat<(int_nvvm_fmin_f 337 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 338 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 339 340 def : Pat<(int_nvvm_fmin_d immDouble1, 341 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 342 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 343 def : Pat<(int_nvvm_fmin_d immDouble1, 344 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 345 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 346 def : Pat<(int_nvvm_fmin_d 347 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 348 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 349 def : Pat<(int_nvvm_fmin_d 350 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 351 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 352 353 354 // We need a full string for OpcStr here because we need to deal with case like 355 // INT_PTX_RECIP. 356 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 357 NVPTXRegClass src_regclass, Intrinsic IntOP> 358 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 359 OpcStr, 360 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; 361 362 // We need a full string for OpcStr here because we need to deal with the case 363 // like INT_PTX_NATIVE_POWR_F. 364 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 365 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP> 366 : NVPTXInst<(outs t_regclass:$dst), 367 (ins s0_regclass:$src0, s1_regclass:$src1), 368 OpcStr, 369 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; 370 371 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 372 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 373 NVPTXRegClass s2_regclass, Intrinsic IntOP> 374 : NVPTXInst<(outs t_regclass:$dst), 375 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 376 OpcStr, 377 [(set t_regclass:$dst, 378 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; 379 380 // 381 // MISC 382 // 383 384 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 385 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 386 387 // 388 // Min Max 389 // 390 391 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 392 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 393 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 394 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 395 396 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 397 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 398 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 399 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 400 401 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 402 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 403 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 404 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 405 406 407 // 408 // Multiplication 409 // 410 411 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 412 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 413 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 414 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 415 416 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 417 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 418 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 419 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 420 421 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 422 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 423 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 424 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 425 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 426 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 427 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 428 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 429 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 430 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 431 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 432 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 433 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 434 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 435 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 436 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 437 438 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 439 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 440 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 441 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 442 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 443 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 444 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 445 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 446 447 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 448 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 449 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 450 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 451 452 // 453 // Div 454 // 455 456 def INT_NVVM_DIV_APPROX_FTZ_F 457 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 458 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 459 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 460 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 461 462 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 463 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 464 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 465 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 466 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 467 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 468 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 469 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 470 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 471 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 472 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 473 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 474 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 475 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 476 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 477 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 478 479 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 480 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 481 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 482 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 483 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 484 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 485 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 486 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 487 488 // 489 // Sad 490 // 491 492 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 493 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 494 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 495 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 496 497 // 498 // Floor Ceil 499 // 500 501 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 502 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 503 def : Pat<(int_nvvm_floor_f Float32Regs:$a), 504 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 505 def : Pat<(int_nvvm_floor_d Float64Regs:$a), 506 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 507 508 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 509 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 510 def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 511 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 512 def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 513 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 514 515 // 516 // Abs 517 // 518 519 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 520 Float32Regs, int_nvvm_fabs_ftz_f>; 521 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 522 Float32Regs, int_nvvm_fabs_f>; 523 524 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 525 Float64Regs, int_nvvm_fabs_d>; 526 527 // 528 // Round 529 // 530 531 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 532 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 533 def : Pat<(int_nvvm_round_f Float32Regs:$a), 534 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 535 def : Pat<(int_nvvm_round_d Float64Regs:$a), 536 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 537 538 // 539 // Trunc 540 // 541 542 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 543 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 544 def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 545 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 546 def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 547 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 548 549 // 550 // Saturate 551 // 552 553 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 554 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 555 def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 556 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 557 def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 558 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 559 560 // 561 // Exp2 Log2 562 // 563 564 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 565 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 566 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 567 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 568 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 569 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 570 571 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 572 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 573 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 574 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 575 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 576 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 577 578 // 579 // Sin Cos 580 // 581 582 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 583 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 584 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 585 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 586 587 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 588 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 589 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 590 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 591 592 // 593 // Fma 594 // 595 596 def INT_NVVM_FMA_RN_FTZ_F 597 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 598 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; 599 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", 600 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; 601 def INT_NVVM_FMA_RZ_FTZ_F 602 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 603 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; 604 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", 605 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; 606 def INT_NVVM_FMA_RM_FTZ_F 607 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 608 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; 609 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", 610 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; 611 def INT_NVVM_FMA_RP_FTZ_F 612 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 613 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; 614 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", 615 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; 616 617 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", 618 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; 619 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", 620 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; 621 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", 622 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; 623 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", 624 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; 625 626 // 627 // Rcp 628 // 629 630 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 631 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 632 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 633 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 634 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 635 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 636 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 637 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 638 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 639 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 640 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 641 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 642 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 643 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 644 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 645 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 646 647 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 648 Float64Regs, int_nvvm_rcp_rn_d>; 649 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 650 Float64Regs, int_nvvm_rcp_rz_d>; 651 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 652 Float64Regs, int_nvvm_rcp_rm_d>; 653 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 654 Float64Regs, int_nvvm_rcp_rp_d>; 655 656 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 657 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 658 659 // 660 // Sqrt 661 // 662 663 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 664 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 665 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 666 Float32Regs, int_nvvm_sqrt_rn_f>; 667 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 668 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 669 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 670 Float32Regs, int_nvvm_sqrt_rz_f>; 671 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 672 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 673 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 674 Float32Regs, int_nvvm_sqrt_rm_f>; 675 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 676 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 677 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 678 Float32Regs, int_nvvm_sqrt_rp_f>; 679 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 680 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 681 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 682 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 683 684 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 685 Float64Regs, int_nvvm_sqrt_rn_d>; 686 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 687 Float64Regs, int_nvvm_sqrt_rz_d>; 688 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 689 Float64Regs, int_nvvm_sqrt_rm_d>; 690 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 691 Float64Regs, int_nvvm_sqrt_rp_d>; 692 693 // nvvm_sqrt intrinsic 694 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 695 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 696 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 697 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 698 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 699 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 700 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 701 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 702 703 // 704 // Rsqrt 705 // 706 707 def INT_NVVM_RSQRT_APPROX_FTZ_F 708 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 709 int_nvvm_rsqrt_approx_ftz_f>; 710 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 711 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 712 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 713 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 714 715 // 716 // Add 717 // 718 719 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 720 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 721 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 722 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 723 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 724 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 725 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 726 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 727 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 728 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 729 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 730 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 731 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 732 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 733 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 734 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 735 736 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 737 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 738 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 739 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 740 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 741 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 742 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 743 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 744 745 // 746 // Convert 747 // 748 749 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 750 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 751 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 752 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 753 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 754 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 755 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 756 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 757 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 758 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 759 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 760 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 761 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 762 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 763 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 764 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 765 766 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 767 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 768 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 769 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 770 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 771 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 772 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 773 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 774 775 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 776 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 777 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 778 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 779 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 780 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 781 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 782 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 783 784 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 785 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 786 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 787 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 788 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 789 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 790 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 791 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 792 793 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 794 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 795 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 796 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 797 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 798 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 799 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 800 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 801 802 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 803 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 804 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 805 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 806 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 807 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 808 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 809 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 810 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 811 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 812 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 813 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 814 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 815 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 816 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 817 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 818 819 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 820 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 821 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 822 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 823 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 824 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 825 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 826 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 827 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 828 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 829 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 830 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 831 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 832 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 833 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 834 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 835 836 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 837 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 838 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 839 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 840 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 841 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 842 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 843 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 844 845 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 846 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 847 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 848 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 849 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 850 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 851 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 852 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 853 854 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 855 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 856 857 def INT_NVVM_D2I_LO : F_MATH_1< 858 !strconcat("{{\n\t", 859 ".reg .b32 %temp; \n\t", 860 "mov.b64 \t{$dst, %temp}, $src0;\n\t", 861 "}}"), 862 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 863 def INT_NVVM_D2I_HI : F_MATH_1< 864 !strconcat("{{\n\t", 865 ".reg .b32 %temp; \n\t", 866 "mov.b64 \t{%temp, $dst}, $src0;\n\t", 867 "}}"), 868 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 869 870 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 871 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 872 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 873 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 874 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 875 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 876 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 877 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 878 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 879 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 880 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 881 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 882 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 883 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 884 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 885 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 886 887 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 888 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 889 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 890 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 891 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 892 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 893 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 894 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 895 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 896 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 897 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 898 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 899 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 900 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 901 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 902 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 903 904 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 905 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 906 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 907 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 908 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 909 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 910 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 911 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 912 913 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 914 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 915 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 916 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 917 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 918 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 919 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 920 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 921 922 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 923 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 924 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 925 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 926 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 927 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 928 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 929 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 930 931 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 932 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 933 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 934 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 935 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 936 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 937 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 938 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 939 940 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 941 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 942 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 943 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 944 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 945 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 946 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 947 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 948 949 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 950 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 951 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 952 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 953 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 954 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 955 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 956 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 957 958 959 def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 960 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>; 961 def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 962 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>; 963 964 // 965 // Bitcast 966 // 967 968 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 969 Float32Regs, int_nvvm_bitcast_f2i>; 970 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 971 Int32Regs, int_nvvm_bitcast_i2f>; 972 973 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 974 Int64Regs, int_nvvm_bitcast_ll2d>; 975 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 976 Float64Regs, int_nvvm_bitcast_d2ll>; 977 978 // 979 // FNS 980 // 981 982 class INT_FNS_MBO<dag ins, dag Operands> 983 : NVPTXInst<(outs Int32Regs:$dst), ins, 984 "fns.b32 \t$dst, $mask, $base, $offset;", 985 [(set Int32Regs:$dst, Operands )]>, 986 Requires<[hasPTX60, hasSM30]>; 987 988 def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), 989 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; 990 def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), 991 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; 992 def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), 993 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; 994 def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), 995 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; 996 def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), 997 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; 998 def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), 999 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; 1000 def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), 1001 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; 1002 def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), 1003 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; 1004 1005 //----------------------------------- 1006 // Atomic Functions 1007 //----------------------------------- 1008 1009 class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 1010 : PatFrag<ops, frag, [{ 1011 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 1012 }]>; 1013 class ATOMIC_SHARED_CHK <dag ops, dag frag> 1014 : PatFrag<ops, frag, [{ 1015 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 1016 }]>; 1017 class ATOMIC_GENERIC_CHK <dag ops, dag frag> 1018 : PatFrag<ops, frag, [{ 1019 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 1020 }]>; 1021 1022 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1023 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1024 Operand IMMType, SDNode IMM, list<Predicate> Pred> { 1025 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1026 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), 1027 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1028 Requires<Pred>; 1029 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 1030 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), 1031 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, 1032 Requires<Pred>; 1033 } 1034 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1035 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, 1036 list<Predicate> Pred = []> { 1037 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1038 IntOp, IMMType, IMM, Pred>; 1039 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1040 IntOp, IMMType, IMM, Pred>; 1041 } 1042 1043 // has 2 operands, neg the second one 1044 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1045 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1046 Operand IMMType, list<Predicate> Pred> { 1047 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1048 !strconcat( 1049 "{{ \n\t", 1050 ".reg \t.s", TypeStr, " temp; \n\t", 1051 "neg.s", TypeStr, " \ttemp, $b; \n\t", 1052 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", 1053 "}}"), 1054 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1055 Requires<Pred>; 1056 } 1057 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, 1058 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, 1059 list<Predicate> Pred = []> { 1060 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1061 IntOp, IMMType, Pred> ; 1062 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1063 IntOp, IMMType, Pred> ; 1064 } 1065 1066 // has 3 operands 1067 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1068 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1069 Operand IMMType, list<Predicate> Pred> { 1070 def reg : NVPTXInst<(outs regclass:$dst), 1071 (ins ptrclass:$addr, regclass:$b, regclass:$c), 1072 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1073 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, 1074 Requires<Pred>; 1075 1076 def imm1 : NVPTXInst<(outs regclass:$dst), 1077 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 1078 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1079 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, 1080 Requires<Pred>; 1081 1082 def imm2 : NVPTXInst<(outs regclass:$dst), 1083 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 1084 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), 1085 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, 1086 Requires<Pred>; 1087 1088 def imm3 : NVPTXInst<(outs regclass:$dst), 1089 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 1090 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1091 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, 1092 Requires<Pred>; 1093 } 1094 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1095 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { 1096 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1097 IntOp, IMMType, Pred>; 1098 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1099 IntOp, IMMType, Pred>; 1100 } 1101 1102 // atom_add 1103 1104 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1105 (atomic_load_add_32 node:$a, node:$b)>; 1106 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1107 (atomic_load_add_32 node:$a, node:$b)>; 1108 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1109 (atomic_load_add_32 node:$a, node:$b)>; 1110 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1111 (atomic_load_add_64 node:$a, node:$b)>; 1112 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1113 (atomic_load_add_64 node:$a, node:$b)>; 1114 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1115 (atomic_load_add_64 node:$a, node:$b)>; 1116 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1117 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; 1118 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1119 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; 1120 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1121 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; 1122 def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1123 (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>; 1124 def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1125 (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>; 1126 def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1127 (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>; 1128 1129 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", 1130 atomic_load_add_32_g, i32imm, imm>; 1131 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", 1132 atomic_load_add_32_s, i32imm, imm>; 1133 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", 1134 atomic_load_add_32_gen, i32imm, imm>; 1135 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1136 ".add", atomic_load_add_32_gen, i32imm, imm>; 1137 1138 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", 1139 atomic_load_add_64_g, i64imm, imm>; 1140 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", 1141 atomic_load_add_64_s, i64imm, imm>; 1142 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", 1143 atomic_load_add_64_gen, i64imm, imm>; 1144 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1145 ".add", atomic_load_add_64_gen, i64imm, imm>; 1146 1147 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", 1148 atomic_load_add_f32_g, f32imm, fpimm>; 1149 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", 1150 atomic_load_add_f32_s, f32imm, fpimm>; 1151 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", 1152 atomic_load_add_f32_gen, f32imm, fpimm>; 1153 1154 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add", 1155 atomic_load_add_f64_g, f64imm, fpimm, [hasAtomAddF64]>; 1156 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add", 1157 atomic_load_add_f64_s, f64imm, fpimm, [hasAtomAddF64]>; 1158 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add", 1159 atomic_load_add_f64_gen, f64imm, fpimm, [hasAtomAddF64]>; 1160 1161 // atom_sub 1162 1163 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1164 (atomic_load_sub_32 node:$a, node:$b)>; 1165 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1166 (atomic_load_sub_32 node:$a, node:$b)>; 1167 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1168 (atomic_load_sub_32 node:$a, node:$b)>; 1169 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1170 (atomic_load_sub_64 node:$a, node:$b)>; 1171 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1172 (atomic_load_sub_64 node:$a, node:$b)>; 1173 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1174 (atomic_load_sub_64 node:$a, node:$b)>; 1175 1176 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", 1177 atomic_load_sub_32_g, i32imm>; 1178 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", 1179 atomic_load_sub_64_g, i64imm>; 1180 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", 1181 atomic_load_sub_32_gen, i32imm>; 1182 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", 1183 ".add", atomic_load_sub_32_gen, i32imm>; 1184 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", 1185 atomic_load_sub_32_s, i32imm>; 1186 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", 1187 atomic_load_sub_64_s, i64imm>; 1188 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", 1189 atomic_load_sub_64_gen, i64imm>; 1190 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", 1191 ".add", atomic_load_sub_64_gen, i64imm>; 1192 1193 // atom_swap 1194 1195 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1196 (atomic_swap_32 node:$a, node:$b)>; 1197 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1198 (atomic_swap_32 node:$a, node:$b)>; 1199 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1200 (atomic_swap_32 node:$a, node:$b)>; 1201 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1202 (atomic_swap_64 node:$a, node:$b)>; 1203 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1204 (atomic_swap_64 node:$a, node:$b)>; 1205 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1206 (atomic_swap_64 node:$a, node:$b)>; 1207 1208 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", 1209 atomic_swap_32_g, i32imm, imm>; 1210 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", 1211 atomic_swap_32_s, i32imm, imm>; 1212 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", 1213 atomic_swap_32_gen, i32imm, imm>; 1214 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1215 ".exch", atomic_swap_32_gen, i32imm, imm>; 1216 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", 1217 atomic_swap_64_g, i64imm, imm>; 1218 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", 1219 atomic_swap_64_s, i64imm, imm>; 1220 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", 1221 atomic_swap_64_gen, i64imm, imm>; 1222 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1223 ".exch", atomic_swap_64_gen, i64imm, imm>; 1224 1225 // atom_max 1226 1227 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1228 , (atomic_load_max_32 node:$a, node:$b)>; 1229 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1230 (atomic_load_max_32 node:$a, node:$b)>; 1231 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1232 (atomic_load_max_32 node:$a, node:$b)>; 1233 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1234 , (atomic_load_max_64 node:$a, node:$b)>; 1235 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1236 (atomic_load_max_64 node:$a, node:$b)>; 1237 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1238 (atomic_load_max_64 node:$a, node:$b)>; 1239 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1240 (atomic_load_umax_32 node:$a, node:$b)>; 1241 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1242 (atomic_load_umax_32 node:$a, node:$b)>; 1243 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1244 (atomic_load_umax_32 node:$a, node:$b)>; 1245 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1246 (atomic_load_umax_64 node:$a, node:$b)>; 1247 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1248 (atomic_load_umax_64 node:$a, node:$b)>; 1249 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1250 (atomic_load_umax_64 node:$a, node:$b)>; 1251 1252 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1253 ".max", atomic_load_max_32_g, i32imm, imm>; 1254 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1255 ".max", atomic_load_max_32_s, i32imm, imm>; 1256 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", 1257 atomic_load_max_32_gen, i32imm, imm>; 1258 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1259 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>; 1260 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1261 ".max", atomic_load_max_64_g, i64imm, imm>; 1262 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1263 ".max", atomic_load_max_64_s, i64imm, imm>; 1264 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max", 1265 atomic_load_max_64_gen, i64imm, imm>; 1266 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1267 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>; 1268 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1269 ".max", atomic_load_umax_32_g, i32imm, imm>; 1270 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1271 ".max", atomic_load_umax_32_s, i32imm, imm>; 1272 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", 1273 atomic_load_umax_32_gen, i32imm, imm>; 1274 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1275 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>; 1276 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1277 ".max", atomic_load_umax_64_g, i64imm, imm>; 1278 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1279 ".max", atomic_load_umax_64_s, i64imm, imm>; 1280 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max", 1281 atomic_load_umax_64_gen, i64imm, imm>; 1282 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1283 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>; 1284 1285 // atom_min 1286 1287 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1288 (atomic_load_min_32 node:$a, node:$b)>; 1289 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1290 (atomic_load_min_32 node:$a, node:$b)>; 1291 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1292 (atomic_load_min_32 node:$a, node:$b)>; 1293 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1294 (atomic_load_min_64 node:$a, node:$b)>; 1295 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1296 (atomic_load_min_64 node:$a, node:$b)>; 1297 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1298 (atomic_load_min_64 node:$a, node:$b)>; 1299 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1300 (atomic_load_umin_32 node:$a, node:$b)>; 1301 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1302 (atomic_load_umin_32 node:$a, node:$b)>; 1303 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1304 (atomic_load_umin_32 node:$a, node:$b)>; 1305 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1306 (atomic_load_umin_64 node:$a, node:$b)>; 1307 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1308 (atomic_load_umin_64 node:$a, node:$b)>; 1309 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1310 (atomic_load_umin_64 node:$a, node:$b)>; 1311 1312 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1313 ".min", atomic_load_min_32_g, i32imm, imm>; 1314 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1315 ".min", atomic_load_min_32_s, i32imm, imm>; 1316 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", 1317 atomic_load_min_32_gen, i32imm, imm>; 1318 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1319 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>; 1320 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1321 ".min", atomic_load_min_64_g, i64imm, imm>; 1322 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1323 ".min", atomic_load_min_64_s, i64imm, imm>; 1324 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min", 1325 atomic_load_min_64_gen, i64imm, imm>; 1326 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1327 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>; 1328 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1329 ".min", atomic_load_umin_32_g, i32imm, imm>; 1330 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1331 ".min", atomic_load_umin_32_s, i32imm, imm>; 1332 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", 1333 atomic_load_umin_32_gen, i32imm, imm>; 1334 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1335 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>; 1336 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1337 ".min", atomic_load_umin_64_g, i64imm, imm>; 1338 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1339 ".min", atomic_load_umin_64_s, i64imm, imm>; 1340 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min", 1341 atomic_load_umin_64_gen, i64imm, imm>; 1342 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1343 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>; 1344 1345 // atom_inc atom_dec 1346 1347 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1348 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1349 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1350 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1351 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1352 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1353 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1354 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1355 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1356 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1357 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1358 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1359 1360 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", 1361 atomic_load_inc_32_g, i32imm, imm>; 1362 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", 1363 atomic_load_inc_32_s, i32imm, imm>; 1364 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", 1365 atomic_load_inc_32_gen, i32imm, imm>; 1366 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1367 ".inc", atomic_load_inc_32_gen, i32imm, imm>; 1368 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", 1369 atomic_load_dec_32_g, i32imm, imm>; 1370 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", 1371 atomic_load_dec_32_s, i32imm, imm>; 1372 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", 1373 atomic_load_dec_32_gen, i32imm, imm>; 1374 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1375 ".dec", atomic_load_dec_32_gen, i32imm, imm>; 1376 1377 // atom_and 1378 1379 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1380 (atomic_load_and_32 node:$a, node:$b)>; 1381 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1382 (atomic_load_and_32 node:$a, node:$b)>; 1383 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1384 (atomic_load_and_32 node:$a, node:$b)>; 1385 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1386 (atomic_load_and_64 node:$a, node:$b)>; 1387 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1388 (atomic_load_and_64 node:$a, node:$b)>; 1389 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1390 (atomic_load_and_64 node:$a, node:$b)>; 1391 1392 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", 1393 atomic_load_and_32_g, i32imm, imm>; 1394 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", 1395 atomic_load_and_32_s, i32imm, imm>; 1396 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", 1397 atomic_load_and_32_gen, i32imm, imm>; 1398 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1399 ".and", atomic_load_and_32_gen, i32imm, imm>; 1400 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and", 1401 atomic_load_and_64_g, i64imm, imm>; 1402 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and", 1403 atomic_load_and_64_s, i64imm, imm>; 1404 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and", 1405 atomic_load_and_64_gen, i64imm, imm>; 1406 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1407 ".and", atomic_load_and_64_gen, i64imm, imm>; 1408 1409 // atom_or 1410 1411 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1412 (atomic_load_or_32 node:$a, node:$b)>; 1413 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1414 (atomic_load_or_32 node:$a, node:$b)>; 1415 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1416 (atomic_load_or_32 node:$a, node:$b)>; 1417 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1418 (atomic_load_or_64 node:$a, node:$b)>; 1419 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1420 (atomic_load_or_64 node:$a, node:$b)>; 1421 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1422 (atomic_load_or_64 node:$a, node:$b)>; 1423 1424 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", 1425 atomic_load_or_32_g, i32imm, imm>; 1426 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", 1427 atomic_load_or_32_gen, i32imm, imm>; 1428 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1429 ".or", atomic_load_or_32_gen, i32imm, imm>; 1430 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", 1431 atomic_load_or_32_s, i32imm, imm>; 1432 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or", 1433 atomic_load_or_64_g, i64imm, imm>; 1434 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or", 1435 atomic_load_or_64_gen, i64imm, imm>; 1436 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1437 ".or", atomic_load_or_64_gen, i64imm, imm>; 1438 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or", 1439 atomic_load_or_64_s, i64imm, imm>; 1440 1441 // atom_xor 1442 1443 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1444 (atomic_load_xor_32 node:$a, node:$b)>; 1445 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1446 (atomic_load_xor_32 node:$a, node:$b)>; 1447 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1448 (atomic_load_xor_32 node:$a, node:$b)>; 1449 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1450 (atomic_load_xor_64 node:$a, node:$b)>; 1451 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1452 (atomic_load_xor_64 node:$a, node:$b)>; 1453 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1454 (atomic_load_xor_64 node:$a, node:$b)>; 1455 1456 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", 1457 atomic_load_xor_32_g, i32imm, imm>; 1458 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", 1459 atomic_load_xor_32_s, i32imm, imm>; 1460 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", 1461 atomic_load_xor_32_gen, i32imm, imm>; 1462 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1463 ".xor", atomic_load_xor_32_gen, i32imm, imm>; 1464 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor", 1465 atomic_load_xor_64_g, i64imm, imm>; 1466 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor", 1467 atomic_load_xor_64_s, i64imm, imm>; 1468 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor", 1469 atomic_load_xor_64_gen, i64imm, imm>; 1470 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1471 ".xor", atomic_load_xor_64_gen, i64imm, imm>; 1472 1473 // atom_cas 1474 1475 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1476 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1477 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1478 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1479 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1480 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1481 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1482 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1483 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1484 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1485 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1486 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1487 1488 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", 1489 atomic_cmp_swap_32_g, i32imm>; 1490 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", 1491 atomic_cmp_swap_32_s, i32imm>; 1492 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", 1493 atomic_cmp_swap_32_gen, i32imm>; 1494 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", 1495 ".cas", atomic_cmp_swap_32_gen, i32imm>; 1496 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", 1497 atomic_cmp_swap_64_g, i64imm>; 1498 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", 1499 atomic_cmp_swap_64_s, i64imm>; 1500 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", 1501 atomic_cmp_swap_64_gen, i64imm>; 1502 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", 1503 ".cas", atomic_cmp_swap_64_gen, i64imm>; 1504 1505 // Support for scoped atomic operations. Matches 1506 // int_nvvm_atomic_{op}_{space}_{type}_{scope} 1507 // and converts it into the appropriate instruction. 1508 // NOTE: not all possible combinations are implemented 1509 // 'space' is limited to generic as it's the only one needed to support CUDA. 1510 // 'scope' = 'gpu' is default and is handled by regular atomic instructions. 1511 class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds, 1512 dag ins, dag Operands> 1513 : NVPTXInst<(outs regclass:$result), ins, 1514 AsmStr, 1515 [(set regclass:$result, Operands)]>, 1516 Requires<Preds>; 1517 1518 // Define instruction variants for all addressing modes. 1519 multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, 1520 NVPTXRegClass regclass, Operand ImmType, 1521 SDNode Imm, ValueType ImmTy, 1522 list<Predicate> Preds> { 1523 let AddedComplexity = 1 in { 1524 def : ATOM23_impl<AsmStr, regclass, Preds, 1525 (ins Int32Regs:$src, regclass:$b), 1526 (Intr Int32Regs:$src, regclass:$b)>; 1527 def : ATOM23_impl<AsmStr, regclass, Preds, 1528 (ins Int64Regs:$src, regclass:$b), 1529 (Intr Int64Regs:$src, regclass:$b)>; 1530 } 1531 // tablegen can't infer argument types from Intrinsic (though it can 1532 // from Instruction) so we have to enforce specific type on 1533 // immediates via explicit cast to ImmTy. 1534 def : ATOM23_impl<AsmStr, regclass, Preds, 1535 (ins Int32Regs:$src, ImmType:$b), 1536 (Intr Int32Regs:$src, (ImmTy Imm:$b))>; 1537 def : ATOM23_impl<AsmStr, regclass, Preds, 1538 (ins Int64Regs:$src, ImmType:$b), 1539 (Intr Int64Regs:$src, (ImmTy Imm:$b))>; 1540 } 1541 1542 multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, 1543 NVPTXRegClass regclass, Operand ImmType, 1544 SDNode Imm, ValueType ImmTy, 1545 list<Predicate> Preds> { 1546 // Variants for register/immediate permutations of $b and $c 1547 let AddedComplexity = 2 in { 1548 def : ATOM23_impl<AsmStr, regclass, Preds, 1549 (ins Int32Regs:$src, regclass:$b, regclass:$c), 1550 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>; 1551 def : ATOM23_impl<AsmStr, regclass, Preds, 1552 (ins Int64Regs:$src, regclass:$b, regclass:$c), 1553 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>; 1554 } 1555 let AddedComplexity = 1 in { 1556 def : ATOM23_impl<AsmStr, regclass, Preds, 1557 (ins Int32Regs:$src, ImmType:$b, regclass:$c), 1558 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1559 def : ATOM23_impl<AsmStr, regclass, Preds, 1560 (ins Int64Regs:$src, ImmType:$b, regclass:$c), 1561 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1562 def : ATOM23_impl<AsmStr, regclass, Preds, 1563 (ins Int32Regs:$src, regclass:$b, ImmType:$c), 1564 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1565 def : ATOM23_impl<AsmStr, regclass, Preds, 1566 (ins Int64Regs:$src, regclass:$b, ImmType:$c), 1567 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1568 } 1569 def : ATOM23_impl<AsmStr, regclass, Preds, 1570 (ins Int32Regs:$src, ImmType:$b, ImmType:$c), 1571 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1572 def : ATOM23_impl<AsmStr, regclass, Preds, 1573 (ins Int64Regs:$src, ImmType:$b, ImmType:$c), 1574 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1575 } 1576 1577 // Constructs instrinsic name and instruction asm strings. 1578 multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, 1579 string ScopeStr, string SpaceStr, 1580 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1581 ValueType ImmTy, list<Predicate> Preds> { 1582 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1583 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1584 # "." # OpStr # "." # TypeStr 1585 # " \t$result, [$src], $b;", 1586 !cast<Intrinsic>( 1587 "int_nvvm_atomic_" # OpStr 1588 # "_" # SpaceStr # "_" # IntTypeStr 1589 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)), 1590 regclass, ImmType, Imm, ImmTy, Preds>; 1591 } 1592 multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, 1593 string ScopeStr, string SpaceStr, 1594 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1595 ValueType ImmTy, list<Predicate> Preds> { 1596 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1597 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1598 # "." # OpStr # "." # TypeStr 1599 # " \t$result, [$src], $b, $c;", 1600 !cast<Intrinsic>( 1601 "int_nvvm_atomic_" # OpStr 1602 # "_" # SpaceStr # "_" # IntTypeStr 1603 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)), 1604 regclass, ImmType, Imm, ImmTy, Preds>; 1605 } 1606 1607 // Constructs variants for different address spaces. 1608 // For now we only need variants for generic space pointers. 1609 multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr, 1610 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1611 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1612 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1613 regclass, ImmType, Imm, ImmTy, Preds>; 1614 } 1615 multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr, 1616 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1617 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1618 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1619 regclass, ImmType, Imm, ImmTy, Preds>; 1620 } 1621 1622 // Constructs variants for different scopes of atomic op. 1623 multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr, 1624 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1625 ValueType ImmTy, list<Predicate> Preds> { 1626 // .gpu scope is default and is currently covered by existing 1627 // atomics w/o explicitly specified scope. 1628 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1629 regclass, ImmType, Imm, ImmTy, 1630 !listconcat(Preds,[hasAtomScope])>; 1631 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1632 regclass, ImmType, Imm, ImmTy, 1633 !listconcat(Preds,[hasAtomScope])>; 1634 } 1635 multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr, 1636 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy, 1637 list<Predicate> Preds> { 1638 // No need to define ".gpu"-scoped atomics. They do the same thing 1639 // as the regular, non-scoped atomics defined elsewhere. 1640 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1641 regclass, ImmType, Imm, ImmTy, 1642 !listconcat(Preds,[hasAtomScope])>; 1643 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1644 regclass, ImmType, Imm, ImmTy, 1645 !listconcat(Preds,[hasAtomScope])>; 1646 } 1647 1648 // atom.add 1649 multiclass ATOM2_add_impl<string OpStr> { 1650 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1651 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1652 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>; 1653 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32, 1654 []>; 1655 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64, 1656 [hasAtomAddF64]>; 1657 } 1658 1659 // atom.{and,or,xor} 1660 multiclass ATOM2_bitwise_impl<string OpStr> { 1661 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1662 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, 1663 [hasAtomBitwise64]>; 1664 } 1665 1666 // atom.exch 1667 multiclass ATOM2_exch_impl<string OpStr> { 1668 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1669 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1670 } 1671 1672 // atom.{min,max} 1673 multiclass ATOM2_minmax_impl<string OpStr> { 1674 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1675 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1676 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64, 1677 [hasAtomMinMax64]>; 1678 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, 1679 [hasAtomMinMax64]>; 1680 } 1681 1682 // atom.{inc,dec} 1683 multiclass ATOM2_incdec_impl<string OpStr> { 1684 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1685 } 1686 1687 // atom.cas 1688 multiclass ATOM3_cas_impl<string OpStr> { 1689 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1690 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1691 } 1692 1693 defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; 1694 defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; 1695 defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; 1696 defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">; 1697 defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">; 1698 defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">; 1699 defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">; 1700 defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">; 1701 defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">; 1702 defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; 1703 1704 //----------------------------------- 1705 // Support for ldu on sm_20 or later 1706 //----------------------------------- 1707 1708 // Don't annotate ldu instructions as mayLoad, as they load from memory that is 1709 // read-only in a kernel. 1710 1711 // Scalar 1712 1713 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 1714 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1715 !strconcat("ldu.global.", TyStr), 1716 []>, Requires<[hasLDU]>; 1717 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1718 !strconcat("ldu.global.", TyStr), 1719 []>, Requires<[hasLDU]>; 1720 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 1721 !strconcat("ldu.global.", TyStr), 1722 []>, Requires<[hasLDU]>; 1723 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1724 !strconcat("ldu.global.", TyStr), 1725 []>, Requires<[hasLDU]>; 1726 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1727 !strconcat("ldu.global.", TyStr), 1728 []>, Requires<[hasLDU]>; 1729 } 1730 1731 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 1732 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 1733 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1734 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1735 defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>; 1736 defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>; 1737 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 1738 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 1739 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1740 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1741 1742 // vector 1743 1744 // Elementized vector ldu 1745 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1746 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1747 (ins Int32Regs:$src), 1748 !strconcat("ldu.global.", TyStr), []>; 1749 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1750 (ins Int64Regs:$src), 1751 !strconcat("ldu.global.", TyStr), []>; 1752 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1753 (ins MEMri:$src), 1754 !strconcat("ldu.global.", TyStr), []>; 1755 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1756 (ins MEMri64:$src), 1757 !strconcat("ldu.global.", TyStr), []>; 1758 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1759 (ins imemAny:$src), 1760 !strconcat("ldu.global.", TyStr), []>; 1761 } 1762 1763 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1764 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1765 regclass:$dst4), (ins Int32Regs:$src), 1766 !strconcat("ldu.global.", TyStr), []>; 1767 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1768 regclass:$dst4), (ins Int64Regs:$src), 1769 !strconcat("ldu.global.", TyStr), []>; 1770 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1771 regclass:$dst4), (ins MEMri:$src), 1772 !strconcat("ldu.global.", TyStr), []>; 1773 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1774 regclass:$dst4), (ins MEMri64:$src), 1775 !strconcat("ldu.global.", TyStr), []>; 1776 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1777 regclass:$dst4), (ins imemAny:$src), 1778 !strconcat("ldu.global.", TyStr), []>; 1779 } 1780 1781 defm INT_PTX_LDU_G_v2i8_ELE 1782 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1783 defm INT_PTX_LDU_G_v2i16_ELE 1784 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1785 defm INT_PTX_LDU_G_v2i32_ELE 1786 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1787 defm INT_PTX_LDU_G_v2f16_ELE 1788 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 1789 defm INT_PTX_LDU_G_v2f16x2_ELE 1790 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 1791 defm INT_PTX_LDU_G_v2f32_ELE 1792 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1793 defm INT_PTX_LDU_G_v2i64_ELE 1794 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1795 defm INT_PTX_LDU_G_v2f64_ELE 1796 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1797 defm INT_PTX_LDU_G_v4i8_ELE 1798 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1799 defm INT_PTX_LDU_G_v4i16_ELE 1800 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1801 Int16Regs>; 1802 defm INT_PTX_LDU_G_v4i32_ELE 1803 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1804 Int32Regs>; 1805 defm INT_PTX_LDU_G_v4f16_ELE 1806 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1807 Float16Regs>; 1808 defm INT_PTX_LDU_G_v4f16x2_ELE 1809 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1810 Float16x2Regs>; 1811 defm INT_PTX_LDU_G_v4f32_ELE 1812 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1813 Float32Regs>; 1814 1815 1816 //----------------------------------- 1817 // Support for ldg on sm_35 or later 1818 //----------------------------------- 1819 1820 // Don't annotate ld.global.nc as mayLoad, because these loads go through the 1821 // non-coherent texture cache, and therefore the values read must be read-only 1822 // during the lifetime of the kernel. 1823 1824 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 1825 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1826 !strconcat("ld.global.nc.", TyStr), 1827 []>, Requires<[hasLDG]>; 1828 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1829 !strconcat("ld.global.nc.", TyStr), 1830 []>, Requires<[hasLDG]>; 1831 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 1832 !strconcat("ld.global.nc.", TyStr), 1833 []>, Requires<[hasLDG]>; 1834 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1835 !strconcat("ld.global.nc.", TyStr), 1836 []>, Requires<[hasLDG]>; 1837 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1838 !strconcat("ld.global.nc.", TyStr), 1839 []>, Requires<[hasLDG]>; 1840 } 1841 1842 defm INT_PTX_LDG_GLOBAL_i8 1843 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 1844 defm INT_PTX_LDG_GLOBAL_i16 1845 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 1846 defm INT_PTX_LDG_GLOBAL_i32 1847 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 1848 defm INT_PTX_LDG_GLOBAL_i64 1849 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 1850 defm INT_PTX_LDG_GLOBAL_f16 1851 : LDG_G<"b16 \t$result, [$src];", Float16Regs>; 1852 defm INT_PTX_LDG_GLOBAL_f16x2 1853 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>; 1854 defm INT_PTX_LDG_GLOBAL_f32 1855 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 1856 defm INT_PTX_LDG_GLOBAL_f64 1857 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 1858 defm INT_PTX_LDG_GLOBAL_p32 1859 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 1860 defm INT_PTX_LDG_GLOBAL_p64 1861 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 1862 1863 // vector 1864 1865 // Elementized vector ldg 1866 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1867 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1868 (ins Int32Regs:$src), 1869 !strconcat("ld.global.nc.", TyStr), []>; 1870 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1871 (ins Int64Regs:$src), 1872 !strconcat("ld.global.nc.", TyStr), []>; 1873 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1874 (ins MEMri:$src), 1875 !strconcat("ld.global.nc.", TyStr), []>; 1876 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1877 (ins MEMri64:$src), 1878 !strconcat("ld.global.nc.", TyStr), []>; 1879 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1880 (ins imemAny:$src), 1881 !strconcat("ld.global.nc.", TyStr), []>; 1882 } 1883 1884 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1885 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1886 regclass:$dst4), (ins Int32Regs:$src), 1887 !strconcat("ld.global.nc.", TyStr), []>; 1888 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1889 regclass:$dst4), (ins Int64Regs:$src), 1890 !strconcat("ld.global.nc.", TyStr), []>; 1891 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1892 regclass:$dst4), (ins MEMri:$src), 1893 !strconcat("ld.global.nc.", TyStr), []>; 1894 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1895 regclass:$dst4), (ins MEMri64:$src), 1896 !strconcat("ld.global.nc.", TyStr), []>; 1897 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1898 regclass:$dst4), (ins imemAny:$src), 1899 !strconcat("ld.global.nc.", TyStr), []>; 1900 } 1901 1902 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 1903 defm INT_PTX_LDG_G_v2i8_ELE 1904 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1905 defm INT_PTX_LDG_G_v2i16_ELE 1906 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1907 defm INT_PTX_LDG_G_v2i32_ELE 1908 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1909 defm INT_PTX_LDG_G_v2f16_ELE 1910 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 1911 defm INT_PTX_LDG_G_v2f16x2_ELE 1912 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 1913 defm INT_PTX_LDG_G_v2f32_ELE 1914 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1915 defm INT_PTX_LDG_G_v2i64_ELE 1916 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1917 defm INT_PTX_LDG_G_v2f64_ELE 1918 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1919 defm INT_PTX_LDG_G_v4i8_ELE 1920 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1921 defm INT_PTX_LDG_G_v4i16_ELE 1922 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1923 defm INT_PTX_LDG_G_v4i32_ELE 1924 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 1925 defm INT_PTX_LDG_G_v4f16_ELE 1926 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>; 1927 defm INT_PTX_LDG_G_v4f16x2_ELE 1928 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; 1929 defm INT_PTX_LDG_G_v4f32_ELE 1930 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 1931 1932 1933 multiclass NG_TO_G<string Str, Intrinsic Intrin> { 1934 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1935 !strconcat("cvta.", Str, ".u32 \t$result, $src;"), 1936 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 1937 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1938 !strconcat("cvta.", Str, ".u64 \t$result, $src;"), 1939 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 1940 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), 1941 "{{ .reg .b64 %tmp;\n\t" 1942 #" cvt.u64.u32 \t%tmp, $src;\n\t" 1943 #" cvta." # Str # ".u64 \t$result, %tmp; }}", 1944 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, 1945 Requires<[useShortPtr]>; 1946 } 1947 1948 multiclass G_TO_NG<string Str, Intrinsic Intrin> { 1949 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1950 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), 1951 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 1952 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1953 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), 1954 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 1955 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), 1956 "{{ .reg .b64 %tmp;\n\t" 1957 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" 1958 #" cvt.u32.u64 \t$result, %tmp; }}", 1959 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, 1960 Requires<[useShortPtr]>; 1961 } 1962 1963 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 1964 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 1965 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 1966 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 1967 1968 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 1969 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 1970 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 1971 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 1972 1973 1974 // nvvm.ptr.gen.to.param 1975 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 1976 (ins Int32Regs:$src), 1977 "mov.u32 \t$result, $src;", 1978 [(set Int32Regs:$result, 1979 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 1980 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 1981 (ins Int64Regs:$src), 1982 "mov.u64 \t$result, $src;", 1983 [(set Int64Regs:$result, 1984 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 1985 1986 1987 // nvvm.move intrinsicc 1988 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 1989 "mov.b16 \t$r, $s;", 1990 [(set Int16Regs:$r, 1991 (int_nvvm_move_i16 Int16Regs:$s))]>; 1992 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 1993 "mov.b32 \t$r, $s;", 1994 [(set Int32Regs:$r, 1995 (int_nvvm_move_i32 Int32Regs:$s))]>; 1996 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 1997 "mov.b64 \t$r, $s;", 1998 [(set Int64Regs:$r, 1999 (int_nvvm_move_i64 Int64Regs:$s))]>; 2000 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 2001 "mov.f32 \t$r, $s;", 2002 [(set Float32Regs:$r, 2003 (int_nvvm_move_float Float32Regs:$s))]>; 2004 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 2005 "mov.f64 \t$r, $s;", 2006 [(set Float64Regs:$r, 2007 (int_nvvm_move_double Float64Regs:$s))]>; 2008 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2009 "mov.u32 \t$r, $s;", 2010 [(set Int32Regs:$r, 2011 (int_nvvm_move_ptr Int32Regs:$s))]>; 2012 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2013 "mov.u64 \t$r, $s;", 2014 [(set Int64Regs:$r, 2015 (int_nvvm_move_ptr Int64Regs:$s))]>; 2016 2017 // @TODO: Are these actually needed, or will we always just see symbols 2018 // copied to registers first? 2019 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 2020 "mov.u32 \t$r, $s;", 2021 [(set Int32Regs:$r, 2022 (int_nvvm_move_ptr texternalsym:$s))]>; 2023 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 2024 "mov.u64 \t$r, $s;", 2025 [(set Int64Regs:$r, 2026 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 2027 2028 2029 // MoveParam %r1, param 2030 // ptr_local_to_gen %r2, %r1 2031 // ptr_gen_to_local %r3, %r2 2032 // -> 2033 // mov %r1, param 2034 2035 // @TODO: Revisit this. There is a type 2036 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 2037 // instructions are not currently defined. However, we can use the ptr 2038 // variants and the asm printer will do the right thing. 2039 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2040 (MoveParam texternalsym:$src)))), 2041 (nvvm_move_ptr64 texternalsym:$src)>; 2042 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2043 (MoveParam texternalsym:$src)))), 2044 (nvvm_move_ptr32 texternalsym:$src)>; 2045 2046 def texsurf_handles 2047 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 2048 "mov.u64 \t$result, $src;", []>; 2049 2050 //----------------------------------- 2051 // Compiler Error Warn 2052 // - Just ignore them in codegen 2053 //----------------------------------- 2054 2055 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2056 "// llvm.nvvm.compiler.warn()", 2057 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 2058 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2059 "// llvm.nvvm.compiler.warn()", 2060 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 2061 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2062 "// llvm.nvvm.compiler.error()", 2063 [(int_nvvm_compiler_error Int32Regs:$a)]>; 2064 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2065 "// llvm.nvvm.compiler.error()", 2066 [(int_nvvm_compiler_error Int64Regs:$a)]>; 2067 2068 2069 // isspacep 2070 2071 def ISSPACEP_CONST_32 2072 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2073 "isspacep.const \t$d, $a;", 2074 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>, 2075 Requires<[hasPTX31]>; 2076 def ISSPACEP_CONST_64 2077 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2078 "isspacep.const \t$d, $a;", 2079 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>, 2080 Requires<[hasPTX31]>; 2081 def ISSPACEP_GLOBAL_32 2082 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2083 "isspacep.global \t$d, $a;", 2084 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>; 2085 def ISSPACEP_GLOBAL_64 2086 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2087 "isspacep.global \t$d, $a;", 2088 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>; 2089 def ISSPACEP_LOCAL_32 2090 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2091 "isspacep.local \t$d, $a;", 2092 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>; 2093 def ISSPACEP_LOCAL_64 2094 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2095 "isspacep.local \t$d, $a;", 2096 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>; 2097 def ISSPACEP_SHARED_32 2098 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2099 "isspacep.shared \t$d, $a;", 2100 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>; 2101 def ISSPACEP_SHARED_64 2102 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2103 "isspacep.shared \t$d, $a;", 2104 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>; 2105 2106 2107 // Special register reads 2108 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 2109 (ins SpecialRegs:$r), 2110 "mov.b32 \t$d, $r;", []>; 2111 2112 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 2113 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 2114 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 2115 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 2116 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 2117 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 2118 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 2119 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 2120 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 2121 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 2122 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 2123 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 2124 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 2125 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 2126 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 2127 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 2128 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 2129 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 2130 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 2131 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 2132 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 2133 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 2134 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 2135 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 2136 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 2137 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 2138 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 2139 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 2140 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 2141 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 2142 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 2143 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 2144 2145 2146 // rotate builtin support 2147 2148 def ROTATE_B32_HW_IMM 2149 : NVPTXInst<(outs Int32Regs:$dst), 2150 (ins Int32Regs:$src, i32imm:$amt), 2151 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2152 [(set Int32Regs:$dst, 2153 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, 2154 Requires<[hasHWROT32]> ; 2155 2156 def ROTATE_B32_HW_REG 2157 : NVPTXInst<(outs Int32Regs:$dst), 2158 (ins Int32Regs:$src, Int32Regs:$amt), 2159 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2160 [(set Int32Regs:$dst, 2161 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, 2162 Requires<[hasHWROT32]> ; 2163 2164 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), 2165 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2166 Requires<[noHWROT32]> ; 2167 2168 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), 2169 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, 2170 Requires<[noHWROT32]> ; 2171 2172 let hasSideEffects = 0 in { 2173 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2174 !strconcat("{{\n\t", 2175 ".reg .b32 %dummy;\n\t", 2176 "mov.b64 \t{$dst,%dummy}, $src;\n\t", 2177 "}}"), 2178 []> ; 2179 2180 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2181 !strconcat("{{\n\t", 2182 ".reg .b32 %dummy;\n\t", 2183 "mov.b64 \t{%dummy,$dst}, $src;\n\t", 2184 "}}"), 2185 []> ; 2186 } 2187 2188 let hasSideEffects = 0 in { 2189 def PACK_TWO_INT32 2190 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), 2191 "mov.b64 \t$dst, {{$lo, $hi}};", []> ; 2192 } 2193 2194 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), 2195 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), 2196 (GET_LO_INT64 Int64Regs:$src))> ; 2197 2198 // Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so 2199 // no side effects. 2200 let hasSideEffects = 0 in { 2201 def SHF_L_WRAP_B32_IMM 2202 : NVPTXInst<(outs Int32Regs:$dst), 2203 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2204 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2205 Requires<[hasHWROT32]>; 2206 2207 def SHF_L_WRAP_B32_REG 2208 : NVPTXInst<(outs Int32Regs:$dst), 2209 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2210 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2211 Requires<[hasHWROT32]>; 2212 2213 def SHF_R_WRAP_B32_IMM 2214 : NVPTXInst<(outs Int32Regs:$dst), 2215 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2216 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2217 Requires<[hasHWROT32]>; 2218 2219 def SHF_R_WRAP_B32_REG 2220 : NVPTXInst<(outs Int32Regs:$dst), 2221 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2222 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2223 Requires<[hasHWROT32]>; 2224 } 2225 2226 // HW version of rotate 64 2227 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2228 (PACK_TWO_INT32 2229 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2230 (GET_LO_INT64 Int64Regs:$src), imm:$amt), 2231 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2232 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, 2233 Requires<[hasHWROT32]>; 2234 2235 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2236 (PACK_TWO_INT32 2237 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2238 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), 2239 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2240 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2241 Requires<[hasHWROT32]>; 2242 2243 2244 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2245 (PACK_TWO_INT32 2246 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2247 (GET_HI_INT64 Int64Regs:$src), imm:$amt), 2248 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2249 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, 2250 Requires<[hasHWROT32]>; 2251 2252 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2253 (PACK_TWO_INT32 2254 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2255 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), 2256 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2257 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2258 Requires<[hasHWROT32]>; 2259 2260 // SW version of rotate 64 2261 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2262 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2263 Requires<[noHWROT32]>; 2264 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2265 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2266 Requires<[noHWROT32]>; 2267 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2268 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, 2269 Requires<[noHWROT32]>; 2270 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2271 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2272 Requires<[noHWROT32]>; 2273 2274 2275 //----------------------------------- 2276 // Texture Intrinsics 2277 //----------------------------------- 2278 2279 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 2280 // also defined in NVPTXReplaceImageHandles.cpp 2281 2282 // texmode_independent 2283 let IsTex = 1, IsTexModeUnified = 0 in { 2284 // Texture fetch instructions using handles 2285 def TEX_1D_F32_S32 2286 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2287 Float32Regs:$b, Float32Regs:$a), 2288 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2289 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2290 []>; 2291 def TEX_1D_F32_F32 2292 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2293 Float32Regs:$b, Float32Regs:$a), 2294 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2295 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2296 []>; 2297 def TEX_1D_F32_F32_LEVEL 2298 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2299 Float32Regs:$b, Float32Regs:$a), 2300 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod), 2301 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2302 "[$t, $s, \\{$x\\}], $lod;", 2303 []>; 2304 def TEX_1D_F32_F32_GRAD 2305 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2306 Float32Regs:$b, Float32Regs:$a), 2307 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2308 Float32Regs:$gradx, Float32Regs:$grady), 2309 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2310 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2311 []>; 2312 def TEX_1D_S32_S32 2313 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2314 Int32Regs:$b, Int32Regs:$a), 2315 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2316 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2317 []>; 2318 def TEX_1D_S32_F32 2319 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2320 Int32Regs:$b, Int32Regs:$a), 2321 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2322 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2323 []>; 2324 def TEX_1D_S32_F32_LEVEL 2325 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2326 Int32Regs:$b, Int32Regs:$a), 2327 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2328 Float32Regs:$lod), 2329 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2330 "[$t, $s, \\{$x\\}], $lod;", 2331 []>; 2332 def TEX_1D_S32_F32_GRAD 2333 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2334 Int32Regs:$b, Int32Regs:$a), 2335 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2336 Float32Regs:$gradx, Float32Regs:$grady), 2337 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2338 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2339 []>; 2340 def TEX_1D_U32_S32 2341 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2342 Int32Regs:$b, Int32Regs:$a), 2343 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2344 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2345 []>; 2346 def TEX_1D_U32_F32 2347 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2348 Int32Regs:$b, Int32Regs:$a), 2349 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2350 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2351 []>; 2352 def TEX_1D_U32_F32_LEVEL 2353 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2354 Int32Regs:$b, Int32Regs:$a), 2355 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2356 Float32Regs:$lod), 2357 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2358 "[$t, $s, \\{$x\\}], $lod;", 2359 []>; 2360 def TEX_1D_U32_F32_GRAD 2361 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2362 Int32Regs:$b, Int32Regs:$a), 2363 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2364 Float32Regs:$gradx, Float32Regs:$grady), 2365 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2366 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2367 []>; 2368 2369 def TEX_1D_ARRAY_F32_S32 2370 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2371 Float32Regs:$b, Float32Regs:$a), 2372 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2373 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2374 "[$t, $s, \\{$l, $x\\}];", 2375 []>; 2376 def TEX_1D_ARRAY_F32_F32 2377 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2378 Float32Regs:$b, Float32Regs:$a), 2379 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2380 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2381 "[$t, $s, \\{$l, $x\\}];", 2382 []>; 2383 def TEX_1D_ARRAY_F32_F32_LEVEL 2384 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2385 Float32Regs:$b, Float32Regs:$a), 2386 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2387 Float32Regs:$lod), 2388 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2389 "[$t, $s, \\{$l, $x\\}], $lod;", 2390 []>; 2391 def TEX_1D_ARRAY_F32_F32_GRAD 2392 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2393 Float32Regs:$b, Float32Regs:$a), 2394 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2395 Float32Regs:$gradx, Float32Regs:$grady), 2396 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2397 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2398 []>; 2399 def TEX_1D_ARRAY_S32_S32 2400 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2401 Int32Regs:$b, Int32Regs:$a), 2402 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2403 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2404 "[$t, $s, \\{$l, $x\\}];", 2405 []>; 2406 def TEX_1D_ARRAY_S32_F32 2407 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2408 Int32Regs:$b, Int32Regs:$a), 2409 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2410 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2411 "[$t, $s, \\{$l, $x\\}];", 2412 []>; 2413 def TEX_1D_ARRAY_S32_F32_LEVEL 2414 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2415 Int32Regs:$b, Int32Regs:$a), 2416 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2417 Float32Regs:$lod), 2418 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2419 "[$t, $s, \\{$l, $x\\}], $lod;", 2420 []>; 2421 def TEX_1D_ARRAY_S32_F32_GRAD 2422 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2423 Int32Regs:$b, Int32Regs:$a), 2424 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2425 Float32Regs:$gradx, Float32Regs:$grady), 2426 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2427 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2428 []>; 2429 def TEX_1D_ARRAY_U32_S32 2430 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2431 Int32Regs:$b, Int32Regs:$a), 2432 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2433 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2434 "[$t, $s, \\{$l, $x\\}];", 2435 []>; 2436 def TEX_1D_ARRAY_U32_F32 2437 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2438 Int32Regs:$b, Int32Regs:$a), 2439 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2440 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2441 "[$t, $s, \\{$l, $x\\}];", 2442 []>; 2443 def TEX_1D_ARRAY_U32_F32_LEVEL 2444 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2445 Int32Regs:$b, Int32Regs:$a), 2446 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2447 Float32Regs:$lod), 2448 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2449 "[$t, $s, \\{$l, $x\\}], $lod;", 2450 []>; 2451 def TEX_1D_ARRAY_U32_F32_GRAD 2452 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2453 Int32Regs:$b, Int32Regs:$a), 2454 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2455 Float32Regs:$gradx, Float32Regs:$grady), 2456 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2457 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2458 []>; 2459 2460 def TEX_2D_F32_S32 2461 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2462 Float32Regs:$b, Float32Regs:$a), 2463 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2464 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2465 "[$t, $s, \\{$x, $y\\}];", 2466 []>; 2467 def TEX_2D_F32_F32 2468 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2469 Float32Regs:$b, Float32Regs:$a), 2470 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2471 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2472 "[$t, $s, \\{$x, $y\\}];", 2473 []>; 2474 def TEX_2D_F32_F32_LEVEL 2475 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2476 Float32Regs:$b, Float32Regs:$a), 2477 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2478 Float32Regs:$lod), 2479 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2480 "[$t, $s, \\{$x, $y\\}], $lod;", 2481 []>; 2482 def TEX_2D_F32_F32_GRAD 2483 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2484 Float32Regs:$b, Float32Regs:$a), 2485 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2486 Float32Regs:$gradx0, Float32Regs:$gradx1, 2487 Float32Regs:$grady0, Float32Regs:$grady1), 2488 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2489 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2490 "\\{$grady0, $grady1\\};", 2491 []>; 2492 def TEX_2D_S32_S32 2493 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2494 Int32Regs:$b, Int32Regs:$a), 2495 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2496 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2497 "[$t, $s, \\{$x, $y\\}];", 2498 []>; 2499 def TEX_2D_S32_F32 2500 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2501 Int32Regs:$b, Int32Regs:$a), 2502 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2503 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2504 "[$t, $s, \\{$x, $y\\}];", 2505 []>; 2506 def TEX_2D_S32_F32_LEVEL 2507 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2508 Int32Regs:$b, Int32Regs:$a), 2509 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2510 Float32Regs:$lod), 2511 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2512 "[$t, $s, \\{$x, $y\\}], $lod;", 2513 []>; 2514 def TEX_2D_S32_F32_GRAD 2515 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2516 Int32Regs:$b, Int32Regs:$a), 2517 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2518 Float32Regs:$gradx0, Float32Regs:$gradx1, 2519 Float32Regs:$grady0, Float32Regs:$grady1), 2520 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2521 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2522 "\\{$grady0, $grady1\\};", 2523 []>; 2524 def TEX_2D_U32_S32 2525 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2526 Int32Regs:$b, Int32Regs:$a), 2527 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2528 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2529 "[$t, $s, \\{$x, $y\\}];", 2530 []>; 2531 def TEX_2D_U32_F32 2532 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2533 Int32Regs:$b, Int32Regs:$a), 2534 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2535 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2536 "[$t, $s, \\{$x, $y\\}];", 2537 []>; 2538 def TEX_2D_U32_F32_LEVEL 2539 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2540 Int32Regs:$b, Int32Regs:$a), 2541 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2542 Float32Regs:$lod), 2543 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2544 "[$t, $s, \\{$x, $y\\}], $lod;", 2545 []>; 2546 def TEX_2D_U32_F32_GRAD 2547 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2548 Int32Regs:$b, Int32Regs:$a), 2549 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2550 Float32Regs:$gradx0, Float32Regs:$gradx1, 2551 Float32Regs:$grady0, Float32Regs:$grady1), 2552 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2553 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2554 "\\{$grady0, $grady1\\};", 2555 []>; 2556 2557 def TEX_2D_ARRAY_F32_S32 2558 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2559 Float32Regs:$b, Float32Regs:$a), 2560 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2561 Int32Regs:$y), 2562 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2563 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2564 []>; 2565 def TEX_2D_ARRAY_F32_F32 2566 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2567 Float32Regs:$b, Float32Regs:$a), 2568 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2569 Float32Regs:$y), 2570 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2571 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2572 []>; 2573 def TEX_2D_ARRAY_F32_F32_LEVEL 2574 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2575 Float32Regs:$b, Float32Regs:$a), 2576 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2577 Float32Regs:$y, Float32Regs:$lod), 2578 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2579 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2580 []>; 2581 def TEX_2D_ARRAY_F32_F32_GRAD 2582 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2583 Float32Regs:$b, Float32Regs:$a), 2584 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2585 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, 2586 Float32Regs:$grady0, Float32Regs:$grady1), 2587 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2588 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2589 "\\{$grady0, $grady1\\};", 2590 []>; 2591 def TEX_2D_ARRAY_S32_S32 2592 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2593 Int32Regs:$b, Int32Regs:$a), 2594 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2595 Int32Regs:$y), 2596 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2597 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2598 []>; 2599 def TEX_2D_ARRAY_S32_F32 2600 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2601 Int32Regs:$b, Int32Regs:$a), 2602 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2603 Float32Regs:$y), 2604 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2605 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2606 []>; 2607 def TEX_2D_ARRAY_S32_F32_LEVEL 2608 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2609 Int32Regs:$b, Int32Regs:$a), 2610 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2611 Float32Regs:$y, Float32Regs:$lod), 2612 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2613 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2614 []>; 2615 def TEX_2D_ARRAY_S32_F32_GRAD 2616 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2617 Int32Regs:$b, Int32Regs:$a), 2618 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2619 Float32Regs:$y, 2620 Float32Regs:$gradx0, Float32Regs:$gradx1, 2621 Float32Regs:$grady0, Float32Regs:$grady1), 2622 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2623 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2624 "\\{$grady0, $grady1\\};", 2625 []>; 2626 def TEX_2D_ARRAY_U32_S32 2627 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2628 Int32Regs:$b, Int32Regs:$a), 2629 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2630 Int32Regs:$y), 2631 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2632 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2633 []>; 2634 def TEX_2D_ARRAY_U32_F32 2635 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2636 Int32Regs:$b, Int32Regs:$a), 2637 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2638 Float32Regs:$y), 2639 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2640 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2641 []>; 2642 def TEX_2D_ARRAY_U32_F32_LEVEL 2643 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2644 Int32Regs:$b, Int32Regs:$a), 2645 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2646 Float32Regs:$y, Float32Regs:$lod), 2647 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2648 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2649 []>; 2650 def TEX_2D_ARRAY_U32_F32_GRAD 2651 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2652 Int32Regs:$b, Int32Regs:$a), 2653 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2654 Float32Regs:$y, 2655 Float32Regs:$gradx0, Float32Regs:$gradx1, 2656 Float32Regs:$grady0, Float32Regs:$grady1), 2657 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2658 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2659 "\\{$grady0, $grady1\\};", 2660 []>; 2661 2662 def TEX_3D_F32_S32 2663 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2664 Float32Regs:$b, Float32Regs:$a), 2665 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2666 Int32Regs:$z), 2667 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2668 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2669 []>; 2670 def TEX_3D_F32_F32 2671 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2672 Float32Regs:$b, Float32Regs:$a), 2673 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2674 Float32Regs:$z), 2675 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2676 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2677 []>; 2678 def TEX_3D_F32_F32_LEVEL 2679 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2680 Float32Regs:$b, Float32Regs:$a), 2681 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2682 Float32Regs:$z, Float32Regs:$lod), 2683 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2684 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2685 []>; 2686 def TEX_3D_F32_F32_GRAD 2687 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2688 Float32Regs:$b, Float32Regs:$a), 2689 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2690 Float32Regs:$z, 2691 Float32Regs:$gradx0, Float32Regs:$gradx1, 2692 Float32Regs:$gradx2, Float32Regs:$grady0, 2693 Float32Regs:$grady1, Float32Regs:$grady2), 2694 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2695 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2696 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2697 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2698 []>; 2699 def TEX_3D_S32_S32 2700 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2701 Int32Regs:$b, Int32Regs:$a), 2702 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2703 Int32Regs:$z), 2704 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2705 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2706 []>; 2707 def TEX_3D_S32_F32 2708 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2709 Int32Regs:$b, Int32Regs:$a), 2710 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2711 Float32Regs:$z), 2712 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2713 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2714 []>; 2715 def TEX_3D_S32_F32_LEVEL 2716 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2717 Int32Regs:$b, Int32Regs:$a), 2718 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2719 Float32Regs:$z, Float32Regs:$lod), 2720 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2721 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2722 []>; 2723 def TEX_3D_S32_F32_GRAD 2724 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2725 Int32Regs:$b, Int32Regs:$a), 2726 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2727 Float32Regs:$z, 2728 Float32Regs:$gradx0, Float32Regs:$gradx1, 2729 Float32Regs:$gradx2, Float32Regs:$grady0, 2730 Float32Regs:$grady1, Float32Regs:$grady2), 2731 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2732 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2733 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2734 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2735 []>; 2736 def TEX_3D_U32_S32 2737 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2738 Int32Regs:$b, Int32Regs:$a), 2739 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2740 Int32Regs:$z), 2741 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2742 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2743 []>; 2744 def TEX_3D_U32_F32 2745 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2746 Int32Regs:$b, Int32Regs:$a), 2747 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2748 Float32Regs:$z), 2749 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2750 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2751 []>; 2752 def TEX_3D_U32_F32_LEVEL 2753 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2754 Int32Regs:$b, Int32Regs:$a), 2755 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2756 Float32Regs:$z, Float32Regs:$lod), 2757 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2758 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2759 []>; 2760 def TEX_3D_U32_F32_GRAD 2761 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2762 Int32Regs:$b, Int32Regs:$a), 2763 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2764 Float32Regs:$z, 2765 Float32Regs:$gradx0, Float32Regs:$gradx1, 2766 Float32Regs:$gradx2, Float32Regs:$grady0, 2767 Float32Regs:$grady1, Float32Regs:$grady2), 2768 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2769 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2770 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2771 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2772 []>; 2773 2774 def TEX_CUBE_F32_F32 2775 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2776 Float32Regs:$b, Float32Regs:$a), 2777 (ins Int64Regs:$t, Int64Regs:$s, 2778 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2779 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2780 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2781 []>; 2782 def TEX_CUBE_F32_F32_LEVEL 2783 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2784 Float32Regs:$b, Float32Regs:$a), 2785 (ins Int64Regs:$t, Int64Regs:$s, 2786 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2787 Float32Regs:$lod), 2788 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2789 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2790 []>; 2791 def TEX_CUBE_S32_F32 2792 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2793 Int32Regs:$b, Int32Regs:$a), 2794 (ins Int64Regs:$t, Int64Regs:$s, 2795 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2796 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2797 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2798 []>; 2799 def TEX_CUBE_S32_F32_LEVEL 2800 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2801 Int32Regs:$b, Int32Regs:$a), 2802 (ins Int64Regs:$t, Int64Regs:$s, 2803 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2804 Float32Regs:$lod), 2805 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2806 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2807 []>; 2808 def TEX_CUBE_U32_F32 2809 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2810 Int32Regs:$b, Int32Regs:$a), 2811 (ins Int64Regs:$t, Int64Regs:$s, 2812 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2813 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2814 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2815 []>; 2816 def TEX_CUBE_U32_F32_LEVEL 2817 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2818 Int32Regs:$b, Int32Regs:$a), 2819 (ins Int64Regs:$t, Int64Regs:$s, 2820 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2821 Float32Regs:$lod), 2822 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2823 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2824 []>; 2825 2826 def TEX_CUBE_ARRAY_F32_F32 2827 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2828 Float32Regs:$b, Float32Regs:$a), 2829 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2830 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2831 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2832 "[$t, $s, \\{$l, $x, $y, $z\\}];", 2833 []>; 2834 def TEX_CUBE_ARRAY_F32_F32_LEVEL 2835 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2836 Float32Regs:$b, Float32Regs:$a), 2837 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2838 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2839 Float32Regs:$lod), 2840 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2841 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 2842 []>; 2843 def TEX_CUBE_ARRAY_S32_F32 2844 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2845 Int32Regs:$b, Int32Regs:$a), 2846 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2847 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2848 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2849 "[$t, $s, \\{$l, $x, $y, $z\\}];", 2850 []>; 2851 def TEX_CUBE_ARRAY_S32_F32_LEVEL 2852 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2853 Int32Regs:$b, Int32Regs:$a), 2854 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2855 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2856 Float32Regs:$lod), 2857 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2858 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 2859 []>; 2860 def TEX_CUBE_ARRAY_U32_F32 2861 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2862 Int32Regs:$b, Int32Regs:$a), 2863 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2864 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2865 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2866 "[$t, $s, \\{$l, $x, $y, $z\\}];", 2867 []>; 2868 def TEX_CUBE_ARRAY_U32_F32_LEVEL 2869 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2870 Int32Regs:$b, Int32Regs:$a), 2871 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2872 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2873 Float32Regs:$lod), 2874 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2875 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 2876 []>; 2877 2878 def TLD4_R_2D_F32_F32 2879 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 2880 Float32Regs:$v2, Float32Regs:$v3), 2881 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2882 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2883 "[$t, $s, \\{$x, $y\\}];", 2884 []>; 2885 def TLD4_G_2D_F32_F32 2886 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 2887 Float32Regs:$v2, Float32Regs:$v3), 2888 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2889 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2890 "[$t, $s, \\{$x, $y\\}];", 2891 []>; 2892 def TLD4_B_2D_F32_F32 2893 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 2894 Float32Regs:$v2, Float32Regs:$v3), 2895 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2896 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2897 "[$t, $s, \\{$x, $y\\}];", 2898 []>; 2899 def TLD4_A_2D_F32_F32 2900 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 2901 Float32Regs:$v2, Float32Regs:$v3), 2902 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2903 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2904 "[$t, $s, \\{$x, $y\\}];", 2905 []>; 2906 def TLD4_R_2D_S32_F32 2907 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2908 Int32Regs:$v2, Int32Regs:$v3), 2909 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2910 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2911 "[$t, $s, \\{$x, $y\\}];", 2912 []>; 2913 def TLD4_G_2D_S32_F32 2914 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2915 Int32Regs:$v2, Int32Regs:$v3), 2916 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2917 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2918 "[$t, $s, \\{$x, $y\\}];", 2919 []>; 2920 def TLD4_B_2D_S32_F32 2921 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2922 Int32Regs:$v2, Int32Regs:$v3), 2923 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2924 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2925 "[$t, $s, \\{$x, $y\\}];", 2926 []>; 2927 def TLD4_A_2D_S32_F32 2928 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2929 Int32Regs:$v2, Int32Regs:$v3), 2930 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2931 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2932 "[$t, $s, \\{$x, $y\\}];", 2933 []>; 2934 def TLD4_R_2D_U32_F32 2935 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2936 Int32Regs:$v2, Int32Regs:$v3), 2937 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2938 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2939 "[$t, $s, \\{$x, $y\\}];", 2940 []>; 2941 def TLD4_G_2D_U32_F32 2942 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2943 Int32Regs:$v2, Int32Regs:$v3), 2944 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2945 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2946 "[$t, $s, \\{$x, $y\\}];", 2947 []>; 2948 def TLD4_B_2D_U32_F32 2949 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2950 Int32Regs:$v2, Int32Regs:$v3), 2951 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2952 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2953 "[$t, $s, \\{$x, $y\\}];", 2954 []>; 2955 def TLD4_A_2D_U32_F32 2956 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2957 Int32Regs:$v2, Int32Regs:$v3), 2958 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2959 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2960 "[$t, $s, \\{$x, $y\\}];", 2961 []>; 2962 } 2963 2964 2965 // texmode_unified 2966 let IsTex = 1, IsTexModeUnified = 1 in { 2967 // Texture fetch instructions using handles 2968 def TEX_UNIFIED_1D_F32_S32 2969 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2970 Float32Regs:$b, Float32Regs:$a), 2971 (ins Int64Regs:$t, Int32Regs:$x), 2972 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 2973 []>; 2974 def TEX_UNIFIED_1D_F32_F32 2975 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2976 Float32Regs:$b, Float32Regs:$a), 2977 (ins Int64Regs:$t, Float32Regs:$x), 2978 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 2979 []>; 2980 def TEX_UNIFIED_1D_F32_F32_LEVEL 2981 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2982 Float32Regs:$b, Float32Regs:$a), 2983 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod), 2984 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2985 "[$t, \\{$x\\}], $lod;", 2986 []>; 2987 def TEX_UNIFIED_1D_F32_F32_GRAD 2988 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2989 Float32Regs:$b, Float32Regs:$a), 2990 (ins Int64Regs:$t, Float32Regs:$x, 2991 Float32Regs:$gradx, Float32Regs:$grady), 2992 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2993 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2994 []>; 2995 def TEX_UNIFIED_1D_S32_S32 2996 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2997 Int32Regs:$b, Int32Regs:$a), 2998 (ins Int64Regs:$t, Int32Regs:$x), 2999 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3000 []>; 3001 def TEX_UNIFIED_1D_S32_F32 3002 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3003 Int32Regs:$b, Int32Regs:$a), 3004 (ins Int64Regs:$t, Float32Regs:$x), 3005 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3006 []>; 3007 def TEX_UNIFIED_1D_S32_F32_LEVEL 3008 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3009 Int32Regs:$b, Int32Regs:$a), 3010 (ins Int64Regs:$t, Float32Regs:$x, 3011 Float32Regs:$lod), 3012 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3013 "[$t, \\{$x\\}], $lod;", 3014 []>; 3015 def TEX_UNIFIED_1D_S32_F32_GRAD 3016 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3017 Int32Regs:$b, Int32Regs:$a), 3018 (ins Int64Regs:$t, Float32Regs:$x, 3019 Float32Regs:$gradx, Float32Regs:$grady), 3020 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3021 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3022 []>; 3023 def TEX_UNIFIED_1D_U32_S32 3024 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3025 Int32Regs:$b, Int32Regs:$a), 3026 (ins Int64Regs:$t, Int32Regs:$x), 3027 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3028 []>; 3029 def TEX_UNIFIED_1D_U32_F32 3030 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3031 Int32Regs:$b, Int32Regs:$a), 3032 (ins Int64Regs:$t, Float32Regs:$x), 3033 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3034 []>; 3035 def TEX_UNIFIED_1D_U32_F32_LEVEL 3036 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3037 Int32Regs:$b, Int32Regs:$a), 3038 (ins Int64Regs:$t, Float32Regs:$x, 3039 Float32Regs:$lod), 3040 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3041 "[$t, \\{$x\\}], $lod;", 3042 []>; 3043 def TEX_UNIFIED_1D_U32_F32_GRAD 3044 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3045 Int32Regs:$b, Int32Regs:$a), 3046 (ins Int64Regs:$t, Float32Regs:$x, 3047 Float32Regs:$gradx, Float32Regs:$grady), 3048 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3049 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3050 []>; 3051 3052 def TEX_UNIFIED_1D_ARRAY_F32_S32 3053 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3054 Float32Regs:$b, Float32Regs:$a), 3055 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3056 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3057 "[$t, \\{$l, $x\\}];", 3058 []>; 3059 def TEX_UNIFIED_1D_ARRAY_F32_F32 3060 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3061 Float32Regs:$b, Float32Regs:$a), 3062 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3063 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3064 "[$t, \\{$l, $x\\}];", 3065 []>; 3066 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL 3067 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3068 Float32Regs:$b, Float32Regs:$a), 3069 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3070 Float32Regs:$lod), 3071 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3072 "[$t, \\{$l, $x\\}], $lod;", 3073 []>; 3074 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD 3075 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3076 Float32Regs:$b, Float32Regs:$a), 3077 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3078 Float32Regs:$gradx, Float32Regs:$grady), 3079 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3080 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3081 []>; 3082 def TEX_UNIFIED_1D_ARRAY_S32_S32 3083 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3084 Int32Regs:$b, Int32Regs:$a), 3085 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3086 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3087 "[$t, \\{$l, $x\\}];", 3088 []>; 3089 def TEX_UNIFIED_1D_ARRAY_S32_F32 3090 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3091 Int32Regs:$b, Int32Regs:$a), 3092 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3093 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3094 "[$t, \\{$l, $x\\}];", 3095 []>; 3096 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL 3097 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3098 Int32Regs:$b, Int32Regs:$a), 3099 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3100 Float32Regs:$lod), 3101 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3102 "[$t, \\{$l, $x\\}], $lod;", 3103 []>; 3104 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD 3105 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3106 Int32Regs:$b, Int32Regs:$a), 3107 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3108 Float32Regs:$gradx, Float32Regs:$grady), 3109 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3110 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3111 []>; 3112 def TEX_UNIFIED_1D_ARRAY_U32_S32 3113 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3114 Int32Regs:$b, Int32Regs:$a), 3115 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3116 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3117 "[$t, \\{$l, $x\\}];", 3118 []>; 3119 def TEX_UNIFIED_1D_ARRAY_U32_F32 3120 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3121 Int32Regs:$b, Int32Regs:$a), 3122 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3123 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3124 "[$t, \\{$l, $x\\}];", 3125 []>; 3126 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL 3127 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3128 Int32Regs:$b, Int32Regs:$a), 3129 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3130 Float32Regs:$lod), 3131 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3132 "[$t, \\{$l, $x\\}], $lod;", 3133 []>; 3134 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD 3135 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3136 Int32Regs:$b, Int32Regs:$a), 3137 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3138 Float32Regs:$gradx, Float32Regs:$grady), 3139 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3140 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3141 []>; 3142 3143 def TEX_UNIFIED_2D_F32_S32 3144 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3145 Float32Regs:$b, Float32Regs:$a), 3146 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3147 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3148 "[$t, \\{$x, $y\\}];", 3149 []>; 3150 def TEX_UNIFIED_2D_F32_F32 3151 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3152 Float32Regs:$b, Float32Regs:$a), 3153 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3154 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3155 "[$t, \\{$x, $y\\}];", 3156 []>; 3157 def TEX_UNIFIED_2D_F32_F32_LEVEL 3158 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3159 Float32Regs:$b, Float32Regs:$a), 3160 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3161 Float32Regs:$lod), 3162 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3163 "[$t, \\{$x, $y\\}], $lod;", 3164 []>; 3165 def TEX_UNIFIED_2D_F32_F32_GRAD 3166 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3167 Float32Regs:$b, Float32Regs:$a), 3168 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3169 Float32Regs:$gradx0, Float32Regs:$gradx1, 3170 Float32Regs:$grady0, Float32Regs:$grady1), 3171 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3172 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3173 "\\{$grady0, $grady1\\};", 3174 []>; 3175 def TEX_UNIFIED_2D_S32_S32 3176 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3177 Int32Regs:$b, Int32Regs:$a), 3178 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3179 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3180 "[$t, \\{$x, $y\\}];", 3181 []>; 3182 def TEX_UNIFIED_2D_S32_F32 3183 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3184 Int32Regs:$b, Int32Regs:$a), 3185 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3186 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3187 "[$t, \\{$x, $y\\}];", 3188 []>; 3189 def TEX_UNIFIED_2D_S32_F32_LEVEL 3190 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3191 Int32Regs:$b, Int32Regs:$a), 3192 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3193 Float32Regs:$lod), 3194 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3195 "[$t, \\{$x, $y\\}], $lod;", 3196 []>; 3197 def TEX_UNIFIED_2D_S32_F32_GRAD 3198 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3199 Int32Regs:$b, Int32Regs:$a), 3200 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3201 Float32Regs:$gradx0, Float32Regs:$gradx1, 3202 Float32Regs:$grady0, Float32Regs:$grady1), 3203 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3204 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3205 "\\{$grady0, $grady1\\};", 3206 []>; 3207 def TEX_UNIFIED_2D_U32_S32 3208 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3209 Int32Regs:$b, Int32Regs:$a), 3210 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3211 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3212 "[$t, \\{$x, $y\\}];", 3213 []>; 3214 def TEX_UNIFIED_2D_U32_F32 3215 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3216 Int32Regs:$b, Int32Regs:$a), 3217 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3218 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3219 "[$t, \\{$x, $y\\}];", 3220 []>; 3221 def TEX_UNIFIED_2D_U32_F32_LEVEL 3222 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3223 Int32Regs:$b, Int32Regs:$a), 3224 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3225 Float32Regs:$lod), 3226 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3227 "[$t, \\{$x, $y\\}], $lod;", 3228 []>; 3229 def TEX_UNIFIED_2D_U32_F32_GRAD 3230 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3231 Int32Regs:$b, Int32Regs:$a), 3232 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3233 Float32Regs:$gradx0, Float32Regs:$gradx1, 3234 Float32Regs:$grady0, Float32Regs:$grady1), 3235 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3236 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3237 "\\{$grady0, $grady1\\};", 3238 []>; 3239 3240 def TEX_UNIFIED_2D_ARRAY_F32_S32 3241 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3242 Float32Regs:$b, Float32Regs:$a), 3243 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3244 Int32Regs:$y), 3245 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3246 "[$t, \\{$l, $x, $y, $y\\}];", 3247 []>; 3248 def TEX_UNIFIED_2D_ARRAY_F32_F32 3249 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3250 Float32Regs:$b, Float32Regs:$a), 3251 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3252 Float32Regs:$y), 3253 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3254 "[$t, \\{$l, $x, $y, $y\\}];", 3255 []>; 3256 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL 3257 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3258 Float32Regs:$b, Float32Regs:$a), 3259 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3260 Float32Regs:$y, Float32Regs:$lod), 3261 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3262 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3263 []>; 3264 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD 3265 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3266 Float32Regs:$b, Float32Regs:$a), 3267 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3268 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, 3269 Float32Regs:$grady0, Float32Regs:$grady1), 3270 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3271 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3272 "\\{$grady0, $grady1\\};", 3273 []>; 3274 def TEX_UNIFIED_2D_ARRAY_S32_S32 3275 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3276 Int32Regs:$b, Int32Regs:$a), 3277 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3278 Int32Regs:$y), 3279 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3280 "[$t, \\{$l, $x, $y, $y\\}];", 3281 []>; 3282 def TEX_UNIFIED_2D_ARRAY_S32_F32 3283 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3284 Int32Regs:$b, Int32Regs:$a), 3285 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3286 Float32Regs:$y), 3287 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3288 "[$t, \\{$l, $x, $y, $y\\}];", 3289 []>; 3290 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL 3291 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3292 Int32Regs:$b, Int32Regs:$a), 3293 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3294 Float32Regs:$y, Float32Regs:$lod), 3295 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3296 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3297 []>; 3298 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD 3299 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3300 Int32Regs:$b, Int32Regs:$a), 3301 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3302 Float32Regs:$y, 3303 Float32Regs:$gradx0, Float32Regs:$gradx1, 3304 Float32Regs:$grady0, Float32Regs:$grady1), 3305 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3306 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3307 "\\{$grady0, $grady1\\};", 3308 []>; 3309 def TEX_UNIFIED_2D_ARRAY_U32_S32 3310 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3311 Int32Regs:$b, Int32Regs:$a), 3312 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3313 Int32Regs:$y), 3314 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3315 "[$t, \\{$l, $x, $y, $y\\}];", 3316 []>; 3317 def TEX_UNIFIED_2D_ARRAY_U32_F32 3318 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3319 Int32Regs:$b, Int32Regs:$a), 3320 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3321 Float32Regs:$y), 3322 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3323 "[$t, \\{$l, $x, $y, $y\\}];", 3324 []>; 3325 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL 3326 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3327 Int32Regs:$b, Int32Regs:$a), 3328 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3329 Float32Regs:$y, Float32Regs:$lod), 3330 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3331 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3332 []>; 3333 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD 3334 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3335 Int32Regs:$b, Int32Regs:$a), 3336 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3337 Float32Regs:$y, 3338 Float32Regs:$gradx0, Float32Regs:$gradx1, 3339 Float32Regs:$grady0, Float32Regs:$grady1), 3340 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3341 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3342 "\\{$grady0, $grady1\\};", 3343 []>; 3344 3345 def TEX_UNIFIED_3D_F32_S32 3346 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3347 Float32Regs:$b, Float32Regs:$a), 3348 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3349 Int32Regs:$z), 3350 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3351 "[$t, \\{$x, $y, $z, $z\\}];", 3352 []>; 3353 def TEX_UNIFIED_3D_F32_F32 3354 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3355 Float32Regs:$b, Float32Regs:$a), 3356 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3357 Float32Regs:$z), 3358 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3359 "[$t, \\{$x, $y, $z, $z\\}];", 3360 []>; 3361 def TEX_UNIFIED_3D_F32_F32_LEVEL 3362 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3363 Float32Regs:$b, Float32Regs:$a), 3364 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3365 Float32Regs:$z, Float32Regs:$lod), 3366 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3367 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3368 []>; 3369 def TEX_UNIFIED_3D_F32_F32_GRAD 3370 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3371 Float32Regs:$b, Float32Regs:$a), 3372 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3373 Float32Regs:$z, 3374 Float32Regs:$gradx0, Float32Regs:$gradx1, 3375 Float32Regs:$gradx2, Float32Regs:$grady0, 3376 Float32Regs:$grady1, Float32Regs:$grady2), 3377 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3378 "[$t, \\{$x, $y, $z, $z\\}], " 3379 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3380 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3381 []>; 3382 def TEX_UNIFIED_3D_S32_S32 3383 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3384 Int32Regs:$b, Int32Regs:$a), 3385 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3386 Int32Regs:$z), 3387 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3388 "[$t, \\{$x, $y, $z, $z\\}];", 3389 []>; 3390 def TEX_UNIFIED_3D_S32_F32 3391 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3392 Int32Regs:$b, Int32Regs:$a), 3393 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3394 Float32Regs:$z), 3395 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3396 "[$t, \\{$x, $y, $z, $z\\}];", 3397 []>; 3398 def TEX_UNIFIED_3D_S32_F32_LEVEL 3399 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3400 Int32Regs:$b, Int32Regs:$a), 3401 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3402 Float32Regs:$z, Float32Regs:$lod), 3403 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3404 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3405 []>; 3406 def TEX_UNIFIED_3D_S32_F32_GRAD 3407 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3408 Int32Regs:$b, Int32Regs:$a), 3409 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3410 Float32Regs:$z, 3411 Float32Regs:$gradx0, Float32Regs:$gradx1, 3412 Float32Regs:$gradx2, Float32Regs:$grady0, 3413 Float32Regs:$grady1, Float32Regs:$grady2), 3414 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3415 "[$t, \\{$x, $y, $z, $z\\}], " 3416 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3417 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3418 []>; 3419 def TEX_UNIFIED_3D_U32_S32 3420 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3421 Int32Regs:$b, Int32Regs:$a), 3422 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3423 Int32Regs:$z), 3424 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3425 "[$t, \\{$x, $y, $z, $z\\}];", 3426 []>; 3427 def TEX_UNIFIED_3D_U32_F32 3428 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3429 Int32Regs:$b, Int32Regs:$a), 3430 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3431 Float32Regs:$z), 3432 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3433 "[$t, \\{$x, $y, $z, $z\\}];", 3434 []>; 3435 def TEX_UNIFIED_3D_U32_F32_LEVEL 3436 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3437 Int32Regs:$b, Int32Regs:$a), 3438 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3439 Float32Regs:$z, Float32Regs:$lod), 3440 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3441 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3442 []>; 3443 def TEX_UNIFIED_3D_U32_F32_GRAD 3444 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3445 Int32Regs:$b, Int32Regs:$a), 3446 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3447 Float32Regs:$z, 3448 Float32Regs:$gradx0, Float32Regs:$gradx1, 3449 Float32Regs:$gradx2, Float32Regs:$grady0, 3450 Float32Regs:$grady1, Float32Regs:$grady2), 3451 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3452 "[$t, \\{$x, $y, $z, $z\\}], " 3453 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3454 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3455 []>; 3456 3457 def TEX_UNIFIED_CUBE_F32_F32 3458 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3459 Float32Regs:$b, Float32Regs:$a), 3460 (ins Int64Regs:$t, 3461 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3462 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3463 "[$t, \\{$x, $y, $z, $z\\}];", 3464 []>; 3465 def TEX_UNIFIED_CUBE_F32_F32_LEVEL 3466 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3467 Float32Regs:$b, Float32Regs:$a), 3468 (ins Int64Regs:$t, 3469 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3470 Float32Regs:$lod), 3471 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3472 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3473 []>; 3474 def TEX_UNIFIED_CUBE_S32_F32 3475 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3476 Int32Regs:$b, Int32Regs:$a), 3477 (ins Int64Regs:$t, 3478 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3479 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3480 "[$t, \\{$x, $y, $z, $z\\}];", 3481 []>; 3482 def TEX_UNIFIED_CUBE_S32_F32_LEVEL 3483 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3484 Int32Regs:$b, Int32Regs:$a), 3485 (ins Int64Regs:$t, 3486 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3487 Float32Regs:$lod), 3488 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3489 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3490 []>; 3491 def TEX_UNIFIED_CUBE_U32_F32 3492 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3493 Int32Regs:$b, Int32Regs:$a), 3494 (ins Int64Regs:$t, 3495 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3496 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3497 "[$t, \\{$x, $y, $z, $z\\}];", 3498 []>; 3499 def TEX_UNIFIED_CUBE_U32_F32_LEVEL 3500 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3501 Int32Regs:$b, Int32Regs:$a), 3502 (ins Int64Regs:$t, 3503 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3504 Float32Regs:$lod), 3505 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3506 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3507 []>; 3508 3509 def TEX_UNIFIED_CUBE_ARRAY_F32_F32 3510 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3511 Float32Regs:$b, Float32Regs:$a), 3512 (ins Int64Regs:$t, Int32Regs:$l, 3513 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3514 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3515 "[$t, \\{$l, $x, $y, $z\\}];", 3516 []>; 3517 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL 3518 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3519 Float32Regs:$b, Float32Regs:$a), 3520 (ins Int64Regs:$t, Int32Regs:$l, 3521 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3522 Float32Regs:$lod), 3523 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3524 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3525 []>; 3526 def TEX_UNIFIED_CUBE_ARRAY_S32_F32 3527 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3528 Int32Regs:$b, Int32Regs:$a), 3529 (ins Int64Regs:$t, Int32Regs:$l, 3530 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3531 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3532 "[$t, \\{$l, $x, $y, $z\\}];", 3533 []>; 3534 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL 3535 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3536 Int32Regs:$b, Int32Regs:$a), 3537 (ins Int64Regs:$t, Int32Regs:$l, 3538 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3539 Float32Regs:$lod), 3540 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3541 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3542 []>; 3543 def TEX_UNIFIED_CUBE_ARRAY_U32_F32 3544 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3545 Int32Regs:$b, Int32Regs:$a), 3546 (ins Int64Regs:$t, Int32Regs:$l, 3547 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3548 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3549 "[$t, \\{$l, $x, $y, $z\\}];", 3550 []>; 3551 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL 3552 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3553 Int32Regs:$b, Int32Regs:$a), 3554 (ins Int64Regs:$t, Int32Regs:$l, 3555 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3556 Float32Regs:$lod), 3557 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3558 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3559 []>; 3560 3561 def TLD4_UNIFIED_R_2D_F32_F32 3562 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3563 Float32Regs:$v2, Float32Regs:$v3), 3564 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3565 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3566 "[$t, \\{$x, $y\\}];", 3567 []>; 3568 def TLD4_UNIFIED_G_2D_F32_F32 3569 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3570 Float32Regs:$v2, Float32Regs:$v3), 3571 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3572 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3573 "[$t, \\{$x, $y\\}];", 3574 []>; 3575 def TLD4_UNIFIED_B_2D_F32_F32 3576 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3577 Float32Regs:$v2, Float32Regs:$v3), 3578 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3579 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3580 "[$t, \\{$x, $y\\}];", 3581 []>; 3582 def TLD4_UNIFIED_A_2D_F32_F32 3583 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3584 Float32Regs:$v2, Float32Regs:$v3), 3585 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3586 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3587 "[$t, \\{$x, $y\\}];", 3588 []>; 3589 def TLD4_UNIFIED_R_2D_S32_F32 3590 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3591 Int32Regs:$v2, Int32Regs:$v3), 3592 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3593 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3594 "[$t, \\{$x, $y\\}];", 3595 []>; 3596 def TLD4_UNIFIED_G_2D_S32_F32 3597 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3598 Int32Regs:$v2, Int32Regs:$v3), 3599 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3600 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3601 "[$t, \\{$x, $y\\}];", 3602 []>; 3603 def TLD4_UNIFIED_B_2D_S32_F32 3604 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3605 Int32Regs:$v2, Int32Regs:$v3), 3606 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3607 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3608 "[$t, \\{$x, $y\\}];", 3609 []>; 3610 def TLD4_UNIFIED_A_2D_S32_F32 3611 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3612 Int32Regs:$v2, Int32Regs:$v3), 3613 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3614 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3615 "[$t, \\{$x, $y\\}];", 3616 []>; 3617 def TLD4_UNIFIED_R_2D_U32_F32 3618 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3619 Int32Regs:$v2, Int32Regs:$v3), 3620 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3621 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3622 "[$t, \\{$x, $y\\}];", 3623 []>; 3624 def TLD4_UNIFIED_G_2D_U32_F32 3625 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3626 Int32Regs:$v2, Int32Regs:$v3), 3627 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3628 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3629 "[$t, \\{$x, $y\\}];", 3630 []>; 3631 def TLD4_UNIFIED_B_2D_U32_F32 3632 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3633 Int32Regs:$v2, Int32Regs:$v3), 3634 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3635 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3636 "[$t, \\{$x, $y\\}];", 3637 []>; 3638 def TLD4_UNIFIED_A_2D_U32_F32 3639 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3640 Int32Regs:$v2, Int32Regs:$v3), 3641 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3642 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3643 "[$t, \\{$x, $y\\}];", 3644 []>; 3645 } 3646 3647 3648 3649 //=== Surface load instructions 3650 // .clamp variant 3651 let IsSuld = 1 in { 3652 def SULD_1D_I8_CLAMP 3653 : NVPTXInst<(outs Int16Regs:$r), 3654 (ins Int64Regs:$s, Int32Regs:$x), 3655 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];", 3656 []>; 3657 def SULD_1D_I16_CLAMP 3658 : NVPTXInst<(outs Int16Regs:$r), 3659 (ins Int64Regs:$s, Int32Regs:$x), 3660 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];", 3661 []>; 3662 def SULD_1D_I32_CLAMP 3663 : NVPTXInst<(outs Int32Regs:$r), 3664 (ins Int64Regs:$s, Int32Regs:$x), 3665 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];", 3666 []>; 3667 def SULD_1D_I64_CLAMP 3668 : NVPTXInst<(outs Int64Regs:$r), 3669 (ins Int64Regs:$s, Int32Regs:$x), 3670 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];", 3671 []>; 3672 3673 def SULD_1D_ARRAY_I8_CLAMP 3674 : NVPTXInst<(outs Int16Regs:$r), 3675 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3676 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3677 []>; 3678 def SULD_1D_ARRAY_I16_CLAMP 3679 : NVPTXInst<(outs Int16Regs:$r), 3680 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3681 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3682 []>; 3683 def SULD_1D_ARRAY_I32_CLAMP 3684 : NVPTXInst<(outs Int32Regs:$r), 3685 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3686 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3687 []>; 3688 def SULD_1D_ARRAY_I64_CLAMP 3689 : NVPTXInst<(outs Int64Regs:$r), 3690 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3691 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3692 []>; 3693 3694 def SULD_2D_I8_CLAMP 3695 : NVPTXInst<(outs Int16Regs:$r), 3696 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3697 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3698 []>; 3699 def SULD_2D_I16_CLAMP 3700 : NVPTXInst<(outs Int16Regs:$r), 3701 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3702 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3703 []>; 3704 def SULD_2D_I32_CLAMP 3705 : NVPTXInst<(outs Int32Regs:$r), 3706 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3707 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3708 []>; 3709 def SULD_2D_I64_CLAMP 3710 : NVPTXInst<(outs Int64Regs:$r), 3711 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3712 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3713 []>; 3714 3715 def SULD_2D_ARRAY_I8_CLAMP 3716 : NVPTXInst<(outs Int16Regs:$r), 3717 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3718 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3719 []>; 3720 def SULD_2D_ARRAY_I16_CLAMP 3721 : NVPTXInst<(outs Int16Regs:$r), 3722 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3723 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3724 []>; 3725 def SULD_2D_ARRAY_I32_CLAMP 3726 : NVPTXInst<(outs Int32Regs:$r), 3727 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3728 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3729 []>; 3730 def SULD_2D_ARRAY_I64_CLAMP 3731 : NVPTXInst<(outs Int64Regs:$r), 3732 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3733 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3734 []>; 3735 3736 def SULD_3D_I8_CLAMP 3737 : NVPTXInst<(outs Int16Regs:$r), 3738 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3739 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3740 []>; 3741 def SULD_3D_I16_CLAMP 3742 : NVPTXInst<(outs Int16Regs:$r), 3743 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3744 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3745 []>; 3746 def SULD_3D_I32_CLAMP 3747 : NVPTXInst<(outs Int32Regs:$r), 3748 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3749 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3750 []>; 3751 def SULD_3D_I64_CLAMP 3752 : NVPTXInst<(outs Int64Regs:$r), 3753 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3754 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3755 []>; 3756 } 3757 3758 let IsSuld = 2 in { 3759 def SULD_1D_V2I8_CLAMP 3760 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3761 (ins Int64Regs:$s, Int32Regs:$x), 3762 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3763 []>; 3764 def SULD_1D_V2I16_CLAMP 3765 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3766 (ins Int64Regs:$s, Int32Regs:$x), 3767 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3768 []>; 3769 def SULD_1D_V2I32_CLAMP 3770 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3771 (ins Int64Regs:$s, Int32Regs:$x), 3772 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3773 []>; 3774 def SULD_1D_V2I64_CLAMP 3775 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3776 (ins Int64Regs:$s, Int32Regs:$x), 3777 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3778 []>; 3779 3780 def SULD_1D_ARRAY_V2I8_CLAMP 3781 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3782 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3783 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3784 []>; 3785 def SULD_1D_ARRAY_V2I16_CLAMP 3786 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3787 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3788 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3789 []>; 3790 def SULD_1D_ARRAY_V2I32_CLAMP 3791 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3792 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3793 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3794 []>; 3795 def SULD_1D_ARRAY_V2I64_CLAMP 3796 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3797 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3798 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3799 []>; 3800 3801 def SULD_2D_V2I8_CLAMP 3802 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3803 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3804 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3805 []>; 3806 def SULD_2D_V2I16_CLAMP 3807 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3808 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3809 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3810 []>; 3811 def SULD_2D_V2I32_CLAMP 3812 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3813 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3814 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3815 []>; 3816 def SULD_2D_V2I64_CLAMP 3817 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3818 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3819 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3820 []>; 3821 3822 def SULD_2D_ARRAY_V2I8_CLAMP 3823 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3824 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3825 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, " 3826 "[$s, \\{$l, $x, $y, $y\\}];", 3827 []>; 3828 def SULD_2D_ARRAY_V2I16_CLAMP 3829 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3830 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3831 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, " 3832 "[$s, \\{$l, $x, $y, $y\\}];", 3833 []>; 3834 def SULD_2D_ARRAY_V2I32_CLAMP 3835 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3836 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3837 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, " 3838 "[$s, \\{$l, $x, $y, $y\\}];", 3839 []>; 3840 def SULD_2D_ARRAY_V2I64_CLAMP 3841 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3842 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3843 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, " 3844 "[$s, \\{$l, $x, $y, $y\\}];", 3845 []>; 3846 3847 def SULD_3D_V2I8_CLAMP 3848 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3849 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3850 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3851 []>; 3852 def SULD_3D_V2I16_CLAMP 3853 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3854 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3855 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3856 []>; 3857 def SULD_3D_V2I32_CLAMP 3858 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3859 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3860 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3861 []>; 3862 def SULD_3D_V2I64_CLAMP 3863 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3864 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3865 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3866 []>; 3867 } 3868 3869 let IsSuld = 3 in { 3870 def SULD_1D_V4I8_CLAMP 3871 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3872 (ins Int64Regs:$s, Int32Regs:$x), 3873 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 3874 []>; 3875 def SULD_1D_V4I16_CLAMP 3876 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3877 (ins Int64Regs:$s, Int32Regs:$x), 3878 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 3879 []>; 3880 def SULD_1D_V4I32_CLAMP 3881 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3882 (ins Int64Regs:$s, Int32Regs:$x), 3883 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 3884 []>; 3885 3886 def SULD_1D_ARRAY_V4I8_CLAMP 3887 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3888 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3889 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 3890 "[$s, \\{$l, $x\\}];", 3891 []>; 3892 def SULD_1D_ARRAY_V4I16_CLAMP 3893 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3894 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3895 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 3896 "[$s, \\{$l, $x\\}];", 3897 []>; 3898 def SULD_1D_ARRAY_V4I32_CLAMP 3899 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3900 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3901 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 3902 "[$s, \\{$l, $x\\}];", 3903 []>; 3904 3905 def SULD_2D_V4I8_CLAMP 3906 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3907 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3908 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 3909 []>; 3910 def SULD_2D_V4I16_CLAMP 3911 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3912 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3913 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 3914 []>; 3915 def SULD_2D_V4I32_CLAMP 3916 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3917 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3918 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 3919 []>; 3920 3921 def SULD_2D_ARRAY_V4I8_CLAMP 3922 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3923 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3924 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 3925 "[$s, \\{$l, $x, $y, $y\\}];", 3926 []>; 3927 def SULD_2D_ARRAY_V4I16_CLAMP 3928 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3929 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3930 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 3931 "[$s, \\{$l, $x, $y, $y\\}];", 3932 []>; 3933 def SULD_2D_ARRAY_V4I32_CLAMP 3934 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3935 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3936 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 3937 "[$s, \\{$l, $x, $y, $y\\}];", 3938 []>; 3939 3940 3941 def SULD_3D_V4I8_CLAMP 3942 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3943 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3944 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 3945 "[$s, \\{$x, $y, $z, $z\\}];", 3946 []>; 3947 def SULD_3D_V4I16_CLAMP 3948 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3949 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3950 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 3951 "[$s, \\{$x, $y, $z, $z\\}];", 3952 []>; 3953 def SULD_3D_V4I32_CLAMP 3954 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3955 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3956 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 3957 "[$s, \\{$x, $y, $z, $z\\}];", 3958 []>; 3959 } 3960 3961 3962 // .trap variant 3963 let IsSuld = 1 in { 3964 def SULD_1D_I8_TRAP 3965 : NVPTXInst<(outs Int16Regs:$r), 3966 (ins Int64Regs:$s, Int32Regs:$x), 3967 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", 3968 []>; 3969 def SULD_1D_I16_TRAP 3970 : NVPTXInst<(outs Int16Regs:$r), 3971 (ins Int64Regs:$s, Int32Regs:$x), 3972 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", 3973 []>; 3974 def SULD_1D_I32_TRAP 3975 : NVPTXInst<(outs Int32Regs:$r), 3976 (ins Int64Regs:$s, Int32Regs:$x), 3977 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", 3978 []>; 3979 def SULD_1D_I64_TRAP 3980 : NVPTXInst<(outs Int64Regs:$r), 3981 (ins Int64Regs:$s, Int32Regs:$x), 3982 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];", 3983 []>; 3984 3985 def SULD_1D_ARRAY_I8_TRAP 3986 : NVPTXInst<(outs Int16Regs:$r), 3987 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3988 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 3989 []>; 3990 def SULD_1D_ARRAY_I16_TRAP 3991 : NVPTXInst<(outs Int16Regs:$r), 3992 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3993 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 3994 []>; 3995 def SULD_1D_ARRAY_I32_TRAP 3996 : NVPTXInst<(outs Int32Regs:$r), 3997 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3998 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 3999 []>; 4000 def SULD_1D_ARRAY_I64_TRAP 4001 : NVPTXInst<(outs Int64Regs:$r), 4002 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4003 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4004 []>; 4005 4006 def SULD_2D_I8_TRAP 4007 : NVPTXInst<(outs Int16Regs:$r), 4008 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4009 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4010 []>; 4011 def SULD_2D_I16_TRAP 4012 : NVPTXInst<(outs Int16Regs:$r), 4013 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4014 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4015 []>; 4016 def SULD_2D_I32_TRAP 4017 : NVPTXInst<(outs Int32Regs:$r), 4018 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4019 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4020 []>; 4021 def SULD_2D_I64_TRAP 4022 : NVPTXInst<(outs Int64Regs:$r), 4023 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4024 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4025 []>; 4026 4027 def SULD_2D_ARRAY_I8_TRAP 4028 : NVPTXInst<(outs Int16Regs:$r), 4029 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4030 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4031 []>; 4032 def SULD_2D_ARRAY_I16_TRAP 4033 : NVPTXInst<(outs Int16Regs:$r), 4034 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4035 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4036 []>; 4037 def SULD_2D_ARRAY_I32_TRAP 4038 : NVPTXInst<(outs Int32Regs:$r), 4039 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4040 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4041 []>; 4042 def SULD_2D_ARRAY_I64_TRAP 4043 : NVPTXInst<(outs Int64Regs:$r), 4044 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4045 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4046 []>; 4047 4048 def SULD_3D_I8_TRAP 4049 : NVPTXInst<(outs Int16Regs:$r), 4050 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4051 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4052 []>; 4053 def SULD_3D_I16_TRAP 4054 : NVPTXInst<(outs Int16Regs:$r), 4055 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4056 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4057 []>; 4058 def SULD_3D_I32_TRAP 4059 : NVPTXInst<(outs Int32Regs:$r), 4060 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4061 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4062 []>; 4063 def SULD_3D_I64_TRAP 4064 : NVPTXInst<(outs Int64Regs:$r), 4065 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4066 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4067 []>; 4068 } 4069 4070 let IsSuld = 2 in { 4071 def SULD_1D_V2I8_TRAP 4072 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4073 (ins Int64Regs:$s, Int32Regs:$x), 4074 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4075 []>; 4076 def SULD_1D_V2I16_TRAP 4077 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4078 (ins Int64Regs:$s, Int32Regs:$x), 4079 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4080 []>; 4081 def SULD_1D_V2I32_TRAP 4082 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4083 (ins Int64Regs:$s, Int32Regs:$x), 4084 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4085 []>; 4086 def SULD_1D_V2I64_TRAP 4087 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4088 (ins Int64Regs:$s, Int32Regs:$x), 4089 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4090 []>; 4091 4092 def SULD_1D_ARRAY_V2I8_TRAP 4093 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4094 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4095 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4096 []>; 4097 def SULD_1D_ARRAY_V2I16_TRAP 4098 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4099 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4100 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4101 []>; 4102 def SULD_1D_ARRAY_V2I32_TRAP 4103 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4104 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4105 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4106 []>; 4107 def SULD_1D_ARRAY_V2I64_TRAP 4108 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4109 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4110 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4111 []>; 4112 4113 def SULD_2D_V2I8_TRAP 4114 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4115 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4116 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4117 []>; 4118 def SULD_2D_V2I16_TRAP 4119 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4120 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4121 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4122 []>; 4123 def SULD_2D_V2I32_TRAP 4124 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4125 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4126 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4127 []>; 4128 def SULD_2D_V2I64_TRAP 4129 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4130 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4131 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4132 []>; 4133 4134 def SULD_2D_ARRAY_V2I8_TRAP 4135 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4136 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4137 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " 4138 "[$s, \\{$l, $x, $y, $y\\}];", 4139 []>; 4140 def SULD_2D_ARRAY_V2I16_TRAP 4141 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4142 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4143 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " 4144 "[$s, \\{$l, $x, $y, $y\\}];", 4145 []>; 4146 def SULD_2D_ARRAY_V2I32_TRAP 4147 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4148 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4149 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " 4150 "[$s, \\{$l, $x, $y, $y\\}];", 4151 []>; 4152 def SULD_2D_ARRAY_V2I64_TRAP 4153 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4154 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4155 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, " 4156 "[$s, \\{$l, $x, $y, $y\\}];", 4157 []>; 4158 4159 def SULD_3D_V2I8_TRAP 4160 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4161 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4162 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4163 []>; 4164 def SULD_3D_V2I16_TRAP 4165 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4166 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4167 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4168 []>; 4169 def SULD_3D_V2I32_TRAP 4170 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4171 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4172 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4173 []>; 4174 def SULD_3D_V2I64_TRAP 4175 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4176 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4177 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4178 []>; 4179 } 4180 4181 let IsSuld = 3 in { 4182 def SULD_1D_V4I8_TRAP 4183 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4184 (ins Int64Regs:$s, Int32Regs:$x), 4185 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4186 []>; 4187 def SULD_1D_V4I16_TRAP 4188 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4189 (ins Int64Regs:$s, Int32Regs:$x), 4190 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4191 []>; 4192 def SULD_1D_V4I32_TRAP 4193 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4194 (ins Int64Regs:$s, Int32Regs:$x), 4195 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4196 []>; 4197 4198 def SULD_1D_ARRAY_V4I8_TRAP 4199 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4200 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4201 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4202 "[$s, \\{$l, $x\\}];", 4203 []>; 4204 def SULD_1D_ARRAY_V4I16_TRAP 4205 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4206 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4207 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4208 "[$s, \\{$l, $x\\}];", 4209 []>; 4210 def SULD_1D_ARRAY_V4I32_TRAP 4211 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4212 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4213 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4214 "[$s, \\{$l, $x\\}];", 4215 []>; 4216 4217 def SULD_2D_V4I8_TRAP 4218 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4219 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4220 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4221 []>; 4222 def SULD_2D_V4I16_TRAP 4223 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4224 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4225 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4226 []>; 4227 def SULD_2D_V4I32_TRAP 4228 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4229 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4230 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4231 []>; 4232 4233 def SULD_2D_ARRAY_V4I8_TRAP 4234 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4235 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4236 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4237 "[$s, \\{$l, $x, $y, $y\\}];", 4238 []>; 4239 def SULD_2D_ARRAY_V4I16_TRAP 4240 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4241 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4242 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4243 "[$s, \\{$l, $x, $y, $y\\}];", 4244 []>; 4245 def SULD_2D_ARRAY_V4I32_TRAP 4246 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4247 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4248 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4249 "[$s, \\{$l, $x, $y, $y\\}];", 4250 []>; 4251 4252 4253 def SULD_3D_V4I8_TRAP 4254 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4255 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4256 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4257 "[$s, \\{$x, $y, $z, $z\\}];", 4258 []>; 4259 def SULD_3D_V4I16_TRAP 4260 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4261 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4262 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4263 "[$s, \\{$x, $y, $z, $z\\}];", 4264 []>; 4265 def SULD_3D_V4I32_TRAP 4266 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4267 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4268 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4269 "[$s, \\{$x, $y, $z, $z\\}];", 4270 []>; 4271 } 4272 4273 // .zero variant 4274 let IsSuld = 1 in { 4275 def SULD_1D_I8_ZERO 4276 : NVPTXInst<(outs Int16Regs:$r), 4277 (ins Int64Regs:$s, Int32Regs:$x), 4278 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];", 4279 []>; 4280 def SULD_1D_I16_ZERO 4281 : NVPTXInst<(outs Int16Regs:$r), 4282 (ins Int64Regs:$s, Int32Regs:$x), 4283 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];", 4284 []>; 4285 def SULD_1D_I32_ZERO 4286 : NVPTXInst<(outs Int32Regs:$r), 4287 (ins Int64Regs:$s, Int32Regs:$x), 4288 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];", 4289 []>; 4290 def SULD_1D_I64_ZERO 4291 : NVPTXInst<(outs Int64Regs:$r), 4292 (ins Int64Regs:$s, Int32Regs:$x), 4293 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];", 4294 []>; 4295 4296 def SULD_1D_ARRAY_I8_ZERO 4297 : NVPTXInst<(outs Int16Regs:$r), 4298 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4299 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4300 []>; 4301 def SULD_1D_ARRAY_I16_ZERO 4302 : NVPTXInst<(outs Int16Regs:$r), 4303 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4304 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4305 []>; 4306 def SULD_1D_ARRAY_I32_ZERO 4307 : NVPTXInst<(outs Int32Regs:$r), 4308 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4309 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4310 []>; 4311 def SULD_1D_ARRAY_I64_ZERO 4312 : NVPTXInst<(outs Int64Regs:$r), 4313 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4314 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4315 []>; 4316 4317 def SULD_2D_I8_ZERO 4318 : NVPTXInst<(outs Int16Regs:$r), 4319 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4320 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4321 []>; 4322 def SULD_2D_I16_ZERO 4323 : NVPTXInst<(outs Int16Regs:$r), 4324 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4325 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4326 []>; 4327 def SULD_2D_I32_ZERO 4328 : NVPTXInst<(outs Int32Regs:$r), 4329 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4330 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4331 []>; 4332 def SULD_2D_I64_ZERO 4333 : NVPTXInst<(outs Int64Regs:$r), 4334 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4335 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4336 []>; 4337 4338 def SULD_2D_ARRAY_I8_ZERO 4339 : NVPTXInst<(outs Int16Regs:$r), 4340 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4341 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4342 []>; 4343 def SULD_2D_ARRAY_I16_ZERO 4344 : NVPTXInst<(outs Int16Regs:$r), 4345 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4346 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4347 []>; 4348 def SULD_2D_ARRAY_I32_ZERO 4349 : NVPTXInst<(outs Int32Regs:$r), 4350 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4351 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4352 []>; 4353 def SULD_2D_ARRAY_I64_ZERO 4354 : NVPTXInst<(outs Int64Regs:$r), 4355 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4356 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4357 []>; 4358 4359 def SULD_3D_I8_ZERO 4360 : NVPTXInst<(outs Int16Regs:$r), 4361 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4362 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4363 []>; 4364 def SULD_3D_I16_ZERO 4365 : NVPTXInst<(outs Int16Regs:$r), 4366 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4367 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4368 []>; 4369 def SULD_3D_I32_ZERO 4370 : NVPTXInst<(outs Int32Regs:$r), 4371 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4372 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4373 []>; 4374 def SULD_3D_I64_ZERO 4375 : NVPTXInst<(outs Int64Regs:$r), 4376 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4377 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4378 []>; 4379 } 4380 4381 let IsSuld = 2 in { 4382 def SULD_1D_V2I8_ZERO 4383 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4384 (ins Int64Regs:$s, Int32Regs:$x), 4385 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4386 []>; 4387 def SULD_1D_V2I16_ZERO 4388 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4389 (ins Int64Regs:$s, Int32Regs:$x), 4390 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4391 []>; 4392 def SULD_1D_V2I32_ZERO 4393 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4394 (ins Int64Regs:$s, Int32Regs:$x), 4395 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4396 []>; 4397 def SULD_1D_V2I64_ZERO 4398 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4399 (ins Int64Regs:$s, Int32Regs:$x), 4400 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4401 []>; 4402 4403 def SULD_1D_ARRAY_V2I8_ZERO 4404 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4405 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4406 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4407 []>; 4408 def SULD_1D_ARRAY_V2I16_ZERO 4409 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4410 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4411 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4412 []>; 4413 def SULD_1D_ARRAY_V2I32_ZERO 4414 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4415 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4416 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4417 []>; 4418 def SULD_1D_ARRAY_V2I64_ZERO 4419 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4420 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4421 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4422 []>; 4423 4424 def SULD_2D_V2I8_ZERO 4425 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4426 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4427 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4428 []>; 4429 def SULD_2D_V2I16_ZERO 4430 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4431 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4432 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4433 []>; 4434 def SULD_2D_V2I32_ZERO 4435 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4436 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4437 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4438 []>; 4439 def SULD_2D_V2I64_ZERO 4440 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4441 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4442 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4443 []>; 4444 4445 def SULD_2D_ARRAY_V2I8_ZERO 4446 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4447 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4448 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, " 4449 "[$s, \\{$l, $x, $y, $y\\}];", 4450 []>; 4451 def SULD_2D_ARRAY_V2I16_ZERO 4452 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4453 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4454 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, " 4455 "[$s, \\{$l, $x, $y, $y\\}];", 4456 []>; 4457 def SULD_2D_ARRAY_V2I32_ZERO 4458 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4459 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4460 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, " 4461 "[$s, \\{$l, $x, $y, $y\\}];", 4462 []>; 4463 def SULD_2D_ARRAY_V2I64_ZERO 4464 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4465 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4466 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, " 4467 "[$s, \\{$l, $x, $y, $y\\}];", 4468 []>; 4469 4470 def SULD_3D_V2I8_ZERO 4471 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4472 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4473 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4474 []>; 4475 def SULD_3D_V2I16_ZERO 4476 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4477 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4478 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4479 []>; 4480 def SULD_3D_V2I32_ZERO 4481 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4482 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4483 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4484 []>; 4485 def SULD_3D_V2I64_ZERO 4486 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4487 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4488 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4489 []>; 4490 } 4491 4492 let IsSuld = 3 in { 4493 def SULD_1D_V4I8_ZERO 4494 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4495 (ins Int64Regs:$s, Int32Regs:$x), 4496 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4497 []>; 4498 def SULD_1D_V4I16_ZERO 4499 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4500 (ins Int64Regs:$s, Int32Regs:$x), 4501 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4502 []>; 4503 def SULD_1D_V4I32_ZERO 4504 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4505 (ins Int64Regs:$s, Int32Regs:$x), 4506 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4507 []>; 4508 4509 def SULD_1D_ARRAY_V4I8_ZERO 4510 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4511 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4512 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4513 "[$s, \\{$l, $x\\}];", 4514 []>; 4515 def SULD_1D_ARRAY_V4I16_ZERO 4516 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4517 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4518 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4519 "[$s, \\{$l, $x\\}];", 4520 []>; 4521 def SULD_1D_ARRAY_V4I32_ZERO 4522 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4523 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4524 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4525 "[$s, \\{$l, $x\\}];", 4526 []>; 4527 4528 def SULD_2D_V4I8_ZERO 4529 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4530 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4531 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4532 []>; 4533 def SULD_2D_V4I16_ZERO 4534 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4535 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4536 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4537 []>; 4538 def SULD_2D_V4I32_ZERO 4539 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4540 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4541 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4542 []>; 4543 4544 def SULD_2D_ARRAY_V4I8_ZERO 4545 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4546 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4547 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4548 "[$s, \\{$l, $x, $y, $y\\}];", 4549 []>; 4550 def SULD_2D_ARRAY_V4I16_ZERO 4551 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4552 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4553 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4554 "[$s, \\{$l, $x, $y, $y\\}];", 4555 []>; 4556 def SULD_2D_ARRAY_V4I32_ZERO 4557 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4558 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4559 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4560 "[$s, \\{$l, $x, $y, $y\\}];", 4561 []>; 4562 4563 4564 def SULD_3D_V4I8_ZERO 4565 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4566 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4567 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4568 "[$s, \\{$x, $y, $z, $z\\}];", 4569 []>; 4570 def SULD_3D_V4I16_ZERO 4571 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4572 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4573 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4574 "[$s, \\{$x, $y, $z, $z\\}];", 4575 []>; 4576 def SULD_3D_V4I32_ZERO 4577 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4578 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4579 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4580 "[$s, \\{$x, $y, $z, $z\\}];", 4581 []>; 4582 } 4583 4584 //----------------------------------- 4585 // Texture Query Intrinsics 4586 //----------------------------------- 4587 4588 let IsSurfTexQuery = 1 in { 4589 def TXQ_CHANNEL_ORDER 4590 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4591 "txq.channel_order.b32 \t$d, [$a];", 4592 []>; 4593 def TXQ_CHANNEL_DATA_TYPE 4594 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4595 "txq.channel_data_type.b32 \t$d, [$a];", 4596 []>; 4597 def TXQ_WIDTH 4598 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4599 "txq.width.b32 \t$d, [$a];", 4600 []>; 4601 def TXQ_HEIGHT 4602 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4603 "txq.height.b32 \t$d, [$a];", 4604 []>; 4605 def TXQ_DEPTH 4606 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4607 "txq.depth.b32 \t$d, [$a];", 4608 []>; 4609 def TXQ_ARRAY_SIZE 4610 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4611 "txq.array_size.b32 \t$d, [$a];", 4612 []>; 4613 def TXQ_NUM_SAMPLES 4614 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4615 "txq.num_samples.b32 \t$d, [$a];", 4616 []>; 4617 def TXQ_NUM_MIPMAP_LEVELS 4618 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4619 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4620 []>; 4621 } 4622 4623 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), 4624 (TXQ_CHANNEL_ORDER Int64Regs:$a)>; 4625 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), 4626 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; 4627 def : Pat<(int_nvvm_txq_width Int64Regs:$a), 4628 (TXQ_WIDTH Int64Regs:$a)>; 4629 def : Pat<(int_nvvm_txq_height Int64Regs:$a), 4630 (TXQ_HEIGHT Int64Regs:$a)>; 4631 def : Pat<(int_nvvm_txq_depth Int64Regs:$a), 4632 (TXQ_DEPTH Int64Regs:$a)>; 4633 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), 4634 (TXQ_ARRAY_SIZE Int64Regs:$a)>; 4635 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), 4636 (TXQ_NUM_SAMPLES Int64Regs:$a)>; 4637 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), 4638 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; 4639 4640 4641 //----------------------------------- 4642 // Surface Query Intrinsics 4643 //----------------------------------- 4644 4645 let IsSurfTexQuery = 1 in { 4646 def SUQ_CHANNEL_ORDER 4647 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4648 "suq.channel_order.b32 \t$d, [$a];", 4649 []>; 4650 def SUQ_CHANNEL_DATA_TYPE 4651 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4652 "suq.channel_data_type.b32 \t$d, [$a];", 4653 []>; 4654 def SUQ_WIDTH 4655 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4656 "suq.width.b32 \t$d, [$a];", 4657 []>; 4658 def SUQ_HEIGHT 4659 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4660 "suq.height.b32 \t$d, [$a];", 4661 []>; 4662 def SUQ_DEPTH 4663 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4664 "suq.depth.b32 \t$d, [$a];", 4665 []>; 4666 def SUQ_ARRAY_SIZE 4667 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4668 "suq.array_size.b32 \t$d, [$a];", 4669 []>; 4670 } 4671 4672 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), 4673 (SUQ_CHANNEL_ORDER Int64Regs:$a)>; 4674 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), 4675 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; 4676 def : Pat<(int_nvvm_suq_width Int64Regs:$a), 4677 (SUQ_WIDTH Int64Regs:$a)>; 4678 def : Pat<(int_nvvm_suq_height Int64Regs:$a), 4679 (SUQ_HEIGHT Int64Regs:$a)>; 4680 def : Pat<(int_nvvm_suq_depth Int64Regs:$a), 4681 (SUQ_DEPTH Int64Regs:$a)>; 4682 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), 4683 (SUQ_ARRAY_SIZE Int64Regs:$a)>; 4684 4685 4686 //===- Handle Query -------------------------------------------------------===// 4687 4688 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work 4689 def ISTYPEP_SAMPLER 4690 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4691 "istypep.samplerref \t$d, $a;", 4692 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; 4693 def ISTYPEP_SURFACE 4694 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4695 "istypep.surfref \t$d, $a;", 4696 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; 4697 def ISTYPEP_TEXTURE 4698 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4699 "istypep.texref \t$d, $a;", 4700 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; 4701 4702 //===- Surface Stores -----------------------------------------------------===// 4703 4704 let IsSust = 1 in { 4705 // Unformatted 4706 // .clamp variant 4707 def SUST_B_1D_B8_CLAMP 4708 : NVPTXInst<(outs), 4709 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4710 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4711 []>; 4712 def SUST_B_1D_B16_CLAMP 4713 : NVPTXInst<(outs), 4714 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4715 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4716 []>; 4717 def SUST_B_1D_B32_CLAMP 4718 : NVPTXInst<(outs), 4719 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 4720 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4721 []>; 4722 def SUST_B_1D_B64_CLAMP 4723 : NVPTXInst<(outs), 4724 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 4725 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4726 []>; 4727 def SUST_B_1D_V2B8_CLAMP 4728 : NVPTXInst<(outs), 4729 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4730 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4731 []>; 4732 def SUST_B_1D_V2B16_CLAMP 4733 : NVPTXInst<(outs), 4734 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4735 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4736 []>; 4737 def SUST_B_1D_V2B32_CLAMP 4738 : NVPTXInst<(outs), 4739 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 4740 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4741 []>; 4742 def SUST_B_1D_V2B64_CLAMP 4743 : NVPTXInst<(outs), 4744 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 4745 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4746 []>; 4747 def SUST_B_1D_V4B8_CLAMP 4748 : NVPTXInst<(outs), 4749 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 4750 Int16Regs:$b, Int16Regs:$a), 4751 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4752 []>; 4753 def SUST_B_1D_V4B16_CLAMP 4754 : NVPTXInst<(outs), 4755 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 4756 Int16Regs:$b, Int16Regs:$a), 4757 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4758 []>; 4759 def SUST_B_1D_V4B32_CLAMP 4760 : NVPTXInst<(outs), 4761 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 4762 Int32Regs:$b, Int32Regs:$a), 4763 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4764 []>; 4765 4766 4767 def SUST_B_1D_ARRAY_B8_CLAMP 4768 : NVPTXInst<(outs), 4769 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 4770 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4771 []>; 4772 def SUST_B_1D_ARRAY_B16_CLAMP 4773 : NVPTXInst<(outs), 4774 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 4775 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4776 []>; 4777 def SUST_B_1D_ARRAY_B32_CLAMP 4778 : NVPTXInst<(outs), 4779 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 4780 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4781 []>; 4782 def SUST_B_1D_ARRAY_B64_CLAMP 4783 : NVPTXInst<(outs), 4784 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 4785 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4786 []>; 4787 def SUST_B_1D_ARRAY_V2B8_CLAMP 4788 : NVPTXInst<(outs), 4789 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4790 Int16Regs:$g), 4791 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4792 []>; 4793 def SUST_B_1D_ARRAY_V2B16_CLAMP 4794 : NVPTXInst<(outs), 4795 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4796 Int16Regs:$g), 4797 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4798 []>; 4799 def SUST_B_1D_ARRAY_V2B32_CLAMP 4800 : NVPTXInst<(outs), 4801 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 4802 Int32Regs:$g), 4803 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4804 []>; 4805 def SUST_B_1D_ARRAY_V2B64_CLAMP 4806 : NVPTXInst<(outs), 4807 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 4808 Int64Regs:$g), 4809 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4810 []>; 4811 def SUST_B_1D_ARRAY_V4B8_CLAMP 4812 : NVPTXInst<(outs), 4813 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4814 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4815 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], " 4816 "\\{$r, $g, $b, $a\\};", 4817 []>; 4818 def SUST_B_1D_ARRAY_V4B16_CLAMP 4819 : NVPTXInst<(outs), 4820 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4821 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4822 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], " 4823 "\\{$r, $g, $b, $a\\};", 4824 []>; 4825 def SUST_B_1D_ARRAY_V4B32_CLAMP 4826 : NVPTXInst<(outs), 4827 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 4828 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4829 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], " 4830 "\\{$r, $g, $b, $a\\};", 4831 []>; 4832 4833 4834 def SUST_B_2D_B8_CLAMP 4835 : NVPTXInst<(outs), 4836 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4837 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 4838 []>; 4839 def SUST_B_2D_B16_CLAMP 4840 : NVPTXInst<(outs), 4841 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4842 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 4843 []>; 4844 def SUST_B_2D_B32_CLAMP 4845 : NVPTXInst<(outs), 4846 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 4847 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 4848 []>; 4849 def SUST_B_2D_B64_CLAMP 4850 : NVPTXInst<(outs), 4851 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 4852 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 4853 []>; 4854 def SUST_B_2D_V2B8_CLAMP 4855 : NVPTXInst<(outs), 4856 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 4857 Int16Regs:$g), 4858 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4859 []>; 4860 def SUST_B_2D_V2B16_CLAMP 4861 : NVPTXInst<(outs), 4862 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 4863 Int16Regs:$g), 4864 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4865 []>; 4866 def SUST_B_2D_V2B32_CLAMP 4867 : NVPTXInst<(outs), 4868 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 4869 Int32Regs:$g), 4870 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4871 []>; 4872 def SUST_B_2D_V2B64_CLAMP 4873 : NVPTXInst<(outs), 4874 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 4875 Int64Regs:$g), 4876 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4877 []>; 4878 def SUST_B_2D_V4B8_CLAMP 4879 : NVPTXInst<(outs), 4880 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 4881 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4882 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], " 4883 "\\{$r, $g, $b, $a\\};", 4884 []>; 4885 def SUST_B_2D_V4B16_CLAMP 4886 : NVPTXInst<(outs), 4887 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 4888 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4889 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], " 4890 "\\{$r, $g, $b, $a\\};", 4891 []>; 4892 def SUST_B_2D_V4B32_CLAMP 4893 : NVPTXInst<(outs), 4894 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 4895 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4896 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], " 4897 "\\{$r, $g, $b, $a\\};", 4898 []>; 4899 4900 4901 def SUST_B_2D_ARRAY_B8_CLAMP 4902 : NVPTXInst<(outs), 4903 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4904 Int16Regs:$r), 4905 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4906 []>; 4907 def SUST_B_2D_ARRAY_B16_CLAMP 4908 : NVPTXInst<(outs), 4909 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4910 Int16Regs:$r), 4911 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4912 []>; 4913 def SUST_B_2D_ARRAY_B32_CLAMP 4914 : NVPTXInst<(outs), 4915 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4916 Int32Regs:$r), 4917 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4918 []>; 4919 def SUST_B_2D_ARRAY_B64_CLAMP 4920 : NVPTXInst<(outs), 4921 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4922 Int64Regs:$r), 4923 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4924 []>; 4925 def SUST_B_2D_ARRAY_V2B8_CLAMP 4926 : NVPTXInst<(outs), 4927 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4928 Int16Regs:$r, Int16Regs:$g), 4929 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4930 "\\{$r, $g\\};", 4931 []>; 4932 def SUST_B_2D_ARRAY_V2B16_CLAMP 4933 : NVPTXInst<(outs), 4934 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4935 Int16Regs:$r, Int16Regs:$g), 4936 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4937 "\\{$r, $g\\};", 4938 []>; 4939 def SUST_B_2D_ARRAY_V2B32_CLAMP 4940 : NVPTXInst<(outs), 4941 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4942 Int32Regs:$r, Int32Regs:$g), 4943 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4944 "\\{$r, $g\\};", 4945 []>; 4946 def SUST_B_2D_ARRAY_V2B64_CLAMP 4947 : NVPTXInst<(outs), 4948 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4949 Int64Regs:$r, Int64Regs:$g), 4950 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4951 "\\{$r, $g\\};", 4952 []>; 4953 def SUST_B_2D_ARRAY_V4B8_CLAMP 4954 : NVPTXInst<(outs), 4955 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4956 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4957 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4958 "\\{$r, $g, $b, $a\\};", 4959 []>; 4960 def SUST_B_2D_ARRAY_V4B16_CLAMP 4961 : NVPTXInst<(outs), 4962 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4963 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4964 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4965 "\\{$r, $g, $b, $a\\};", 4966 []>; 4967 def SUST_B_2D_ARRAY_V4B32_CLAMP 4968 : NVPTXInst<(outs), 4969 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4970 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4971 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4972 "\\{$r, $g, $b, $a\\};", 4973 []>; 4974 4975 4976 def SUST_B_3D_B8_CLAMP 4977 : NVPTXInst<(outs), 4978 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4979 Int16Regs:$r), 4980 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4981 []>; 4982 def SUST_B_3D_B16_CLAMP 4983 : NVPTXInst<(outs), 4984 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4985 Int16Regs:$r), 4986 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4987 []>; 4988 def SUST_B_3D_B32_CLAMP 4989 : NVPTXInst<(outs), 4990 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4991 Int32Regs:$r), 4992 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4993 []>; 4994 def SUST_B_3D_B64_CLAMP 4995 : NVPTXInst<(outs), 4996 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4997 Int64Regs:$r), 4998 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4999 []>; 5000 def SUST_B_3D_V2B8_CLAMP 5001 : NVPTXInst<(outs), 5002 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5003 Int16Regs:$r, Int16Regs:$g), 5004 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5005 "\\{$r, $g\\};", 5006 []>; 5007 def SUST_B_3D_V2B16_CLAMP 5008 : NVPTXInst<(outs), 5009 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5010 Int16Regs:$r, Int16Regs:$g), 5011 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5012 "\\{$r, $g\\};", 5013 []>; 5014 def SUST_B_3D_V2B32_CLAMP 5015 : NVPTXInst<(outs), 5016 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5017 Int32Regs:$r, Int32Regs:$g), 5018 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5019 "\\{$r, $g\\};", 5020 []>; 5021 def SUST_B_3D_V2B64_CLAMP 5022 : NVPTXInst<(outs), 5023 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5024 Int64Regs:$r, Int64Regs:$g), 5025 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5026 "\\{$r, $g\\};", 5027 []>; 5028 def SUST_B_3D_V4B8_CLAMP 5029 : NVPTXInst<(outs), 5030 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5031 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5032 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5033 "\\{$r, $g, $b, $a\\};", 5034 []>; 5035 def SUST_B_3D_V4B16_CLAMP 5036 : NVPTXInst<(outs), 5037 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5038 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5039 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5040 "\\{$r, $g, $b, $a\\};", 5041 []>; 5042 def SUST_B_3D_V4B32_CLAMP 5043 : NVPTXInst<(outs), 5044 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5045 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5046 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5047 "\\{$r, $g, $b, $a\\};", 5048 []>; 5049 5050 5051 // .trap variant 5052 def SUST_B_1D_B8_TRAP 5053 : NVPTXInst<(outs), 5054 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5055 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", 5056 []>; 5057 def SUST_B_1D_B16_TRAP 5058 : NVPTXInst<(outs), 5059 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5060 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", 5061 []>; 5062 def SUST_B_1D_B32_TRAP 5063 : NVPTXInst<(outs), 5064 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5065 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", 5066 []>; 5067 def SUST_B_1D_B64_TRAP 5068 : NVPTXInst<(outs), 5069 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5070 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};", 5071 []>; 5072 def SUST_B_1D_V2B8_TRAP 5073 : NVPTXInst<(outs), 5074 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5075 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5076 []>; 5077 def SUST_B_1D_V2B16_TRAP 5078 : NVPTXInst<(outs), 5079 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5080 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5081 []>; 5082 def SUST_B_1D_V2B32_TRAP 5083 : NVPTXInst<(outs), 5084 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5085 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5086 []>; 5087 def SUST_B_1D_V2B64_TRAP 5088 : NVPTXInst<(outs), 5089 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5090 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5091 []>; 5092 def SUST_B_1D_V4B8_TRAP 5093 : NVPTXInst<(outs), 5094 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5095 Int16Regs:$b, Int16Regs:$a), 5096 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5097 []>; 5098 def SUST_B_1D_V4B16_TRAP 5099 : NVPTXInst<(outs), 5100 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5101 Int16Regs:$b, Int16Regs:$a), 5102 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5103 []>; 5104 def SUST_B_1D_V4B32_TRAP 5105 : NVPTXInst<(outs), 5106 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5107 Int32Regs:$b, Int32Regs:$a), 5108 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5109 []>; 5110 5111 5112 def SUST_B_1D_ARRAY_B8_TRAP 5113 : NVPTXInst<(outs), 5114 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5115 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5116 []>; 5117 def SUST_B_1D_ARRAY_B16_TRAP 5118 : NVPTXInst<(outs), 5119 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5120 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5121 []>; 5122 def SUST_B_1D_ARRAY_B32_TRAP 5123 : NVPTXInst<(outs), 5124 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5125 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5126 []>; 5127 def SUST_B_1D_ARRAY_B64_TRAP 5128 : NVPTXInst<(outs), 5129 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 5130 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5131 []>; 5132 def SUST_B_1D_ARRAY_V2B8_TRAP 5133 : NVPTXInst<(outs), 5134 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5135 Int16Regs:$g), 5136 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5137 []>; 5138 def SUST_B_1D_ARRAY_V2B16_TRAP 5139 : NVPTXInst<(outs), 5140 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5141 Int16Regs:$g), 5142 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5143 []>; 5144 def SUST_B_1D_ARRAY_V2B32_TRAP 5145 : NVPTXInst<(outs), 5146 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5147 Int32Regs:$g), 5148 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5149 []>; 5150 def SUST_B_1D_ARRAY_V2B64_TRAP 5151 : NVPTXInst<(outs), 5152 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 5153 Int64Regs:$g), 5154 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5155 []>; 5156 def SUST_B_1D_ARRAY_V4B8_TRAP 5157 : NVPTXInst<(outs), 5158 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5159 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5160 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " 5161 "\\{$r, $g, $b, $a\\};", 5162 []>; 5163 def SUST_B_1D_ARRAY_V4B16_TRAP 5164 : NVPTXInst<(outs), 5165 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5166 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5167 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " 5168 "\\{$r, $g, $b, $a\\};", 5169 []>; 5170 def SUST_B_1D_ARRAY_V4B32_TRAP 5171 : NVPTXInst<(outs), 5172 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5173 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5174 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " 5175 "\\{$r, $g, $b, $a\\};", 5176 []>; 5177 5178 5179 def SUST_B_2D_B8_TRAP 5180 : NVPTXInst<(outs), 5181 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5182 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5183 []>; 5184 def SUST_B_2D_B16_TRAP 5185 : NVPTXInst<(outs), 5186 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5187 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5188 []>; 5189 def SUST_B_2D_B32_TRAP 5190 : NVPTXInst<(outs), 5191 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5192 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5193 []>; 5194 def SUST_B_2D_B64_TRAP 5195 : NVPTXInst<(outs), 5196 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5197 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5198 []>; 5199 def SUST_B_2D_V2B8_TRAP 5200 : NVPTXInst<(outs), 5201 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5202 Int16Regs:$g), 5203 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5204 []>; 5205 def SUST_B_2D_V2B16_TRAP 5206 : NVPTXInst<(outs), 5207 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5208 Int16Regs:$g), 5209 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5210 []>; 5211 def SUST_B_2D_V2B32_TRAP 5212 : NVPTXInst<(outs), 5213 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5214 Int32Regs:$g), 5215 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5216 []>; 5217 def SUST_B_2D_V2B64_TRAP 5218 : NVPTXInst<(outs), 5219 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5220 Int64Regs:$g), 5221 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5222 []>; 5223 def SUST_B_2D_V4B8_TRAP 5224 : NVPTXInst<(outs), 5225 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5226 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5227 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " 5228 "\\{$r, $g, $b, $a\\};", 5229 []>; 5230 def SUST_B_2D_V4B16_TRAP 5231 : NVPTXInst<(outs), 5232 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5233 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5234 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " 5235 "\\{$r, $g, $b, $a\\};", 5236 []>; 5237 def SUST_B_2D_V4B32_TRAP 5238 : NVPTXInst<(outs), 5239 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5240 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5241 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " 5242 "\\{$r, $g, $b, $a\\};", 5243 []>; 5244 5245 5246 def SUST_B_2D_ARRAY_B8_TRAP 5247 : NVPTXInst<(outs), 5248 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5249 Int16Regs:$r), 5250 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5251 []>; 5252 def SUST_B_2D_ARRAY_B16_TRAP 5253 : NVPTXInst<(outs), 5254 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5255 Int16Regs:$r), 5256 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5257 []>; 5258 def SUST_B_2D_ARRAY_B32_TRAP 5259 : NVPTXInst<(outs), 5260 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5261 Int32Regs:$r), 5262 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5263 []>; 5264 def SUST_B_2D_ARRAY_B64_TRAP 5265 : NVPTXInst<(outs), 5266 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5267 Int64Regs:$r), 5268 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5269 []>; 5270 def SUST_B_2D_ARRAY_V2B8_TRAP 5271 : NVPTXInst<(outs), 5272 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5273 Int16Regs:$r, Int16Regs:$g), 5274 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5275 "\\{$r, $g\\};", 5276 []>; 5277 def SUST_B_2D_ARRAY_V2B16_TRAP 5278 : NVPTXInst<(outs), 5279 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5280 Int16Regs:$r, Int16Regs:$g), 5281 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5282 "\\{$r, $g\\};", 5283 []>; 5284 def SUST_B_2D_ARRAY_V2B32_TRAP 5285 : NVPTXInst<(outs), 5286 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5287 Int32Regs:$r, Int32Regs:$g), 5288 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5289 "\\{$r, $g\\};", 5290 []>; 5291 def SUST_B_2D_ARRAY_V2B64_TRAP 5292 : NVPTXInst<(outs), 5293 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5294 Int64Regs:$r, Int64Regs:$g), 5295 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5296 "\\{$r, $g\\};", 5297 []>; 5298 def SUST_B_2D_ARRAY_V4B8_TRAP 5299 : NVPTXInst<(outs), 5300 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5301 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5302 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5303 "\\{$r, $g, $b, $a\\};", 5304 []>; 5305 def SUST_B_2D_ARRAY_V4B16_TRAP 5306 : NVPTXInst<(outs), 5307 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5308 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5309 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5310 "\\{$r, $g, $b, $a\\};", 5311 []>; 5312 def SUST_B_2D_ARRAY_V4B32_TRAP 5313 : NVPTXInst<(outs), 5314 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5315 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5316 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5317 "\\{$r, $g, $b, $a\\};", 5318 []>; 5319 5320 5321 def SUST_B_3D_B8_TRAP 5322 : NVPTXInst<(outs), 5323 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5324 Int16Regs:$r), 5325 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5326 []>; 5327 def SUST_B_3D_B16_TRAP 5328 : NVPTXInst<(outs), 5329 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5330 Int16Regs:$r), 5331 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5332 []>; 5333 def SUST_B_3D_B32_TRAP 5334 : NVPTXInst<(outs), 5335 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5336 Int32Regs:$r), 5337 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5338 []>; 5339 def SUST_B_3D_B64_TRAP 5340 : NVPTXInst<(outs), 5341 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5342 Int64Regs:$r), 5343 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5344 []>; 5345 def SUST_B_3D_V2B8_TRAP 5346 : NVPTXInst<(outs), 5347 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5348 Int16Regs:$r, Int16Regs:$g), 5349 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5350 "\\{$r, $g\\};", 5351 []>; 5352 def SUST_B_3D_V2B16_TRAP 5353 : NVPTXInst<(outs), 5354 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5355 Int16Regs:$r, Int16Regs:$g), 5356 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5357 "\\{$r, $g\\};", 5358 []>; 5359 def SUST_B_3D_V2B32_TRAP 5360 : NVPTXInst<(outs), 5361 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5362 Int32Regs:$r, Int32Regs:$g), 5363 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5364 "\\{$r, $g\\};", 5365 []>; 5366 def SUST_B_3D_V2B64_TRAP 5367 : NVPTXInst<(outs), 5368 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5369 Int64Regs:$r, Int64Regs:$g), 5370 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5371 "\\{$r, $g\\};", 5372 []>; 5373 def SUST_B_3D_V4B8_TRAP 5374 : NVPTXInst<(outs), 5375 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5376 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5377 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5378 "\\{$r, $g, $b, $a\\};", 5379 []>; 5380 def SUST_B_3D_V4B16_TRAP 5381 : NVPTXInst<(outs), 5382 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5383 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5384 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5385 "\\{$r, $g, $b, $a\\};", 5386 []>; 5387 def SUST_B_3D_V4B32_TRAP 5388 : NVPTXInst<(outs), 5389 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5390 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5391 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5392 "\\{$r, $g, $b, $a\\};", 5393 []>; 5394 5395 5396 // .zero variant 5397 def SUST_B_1D_B8_ZERO 5398 : NVPTXInst<(outs), 5399 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5400 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};", 5401 []>; 5402 def SUST_B_1D_B16_ZERO 5403 : NVPTXInst<(outs), 5404 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5405 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};", 5406 []>; 5407 def SUST_B_1D_B32_ZERO 5408 : NVPTXInst<(outs), 5409 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5410 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};", 5411 []>; 5412 def SUST_B_1D_B64_ZERO 5413 : NVPTXInst<(outs), 5414 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5415 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};", 5416 []>; 5417 def SUST_B_1D_V2B8_ZERO 5418 : NVPTXInst<(outs), 5419 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5420 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5421 []>; 5422 def SUST_B_1D_V2B16_ZERO 5423 : NVPTXInst<(outs), 5424 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5425 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5426 []>; 5427 def SUST_B_1D_V2B32_ZERO 5428 : NVPTXInst<(outs), 5429 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5430 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5431 []>; 5432 def SUST_B_1D_V2B64_ZERO 5433 : NVPTXInst<(outs), 5434 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5435 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5436 []>; 5437 def SUST_B_1D_V4B8_ZERO 5438 : NVPTXInst<(outs), 5439 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5440 Int16Regs:$b, Int16Regs:$a), 5441 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5442 []>; 5443 def SUST_B_1D_V4B16_ZERO 5444 : NVPTXInst<(outs), 5445 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5446 Int16Regs:$b, Int16Regs:$a), 5447 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5448 []>; 5449 def SUST_B_1D_V4B32_ZERO 5450 : NVPTXInst<(outs), 5451 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5452 Int32Regs:$b, Int32Regs:$a), 5453 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5454 []>; 5455 5456 5457 def SUST_B_1D_ARRAY_B8_ZERO 5458 : NVPTXInst<(outs), 5459 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5460 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5461 []>; 5462 def SUST_B_1D_ARRAY_B16_ZERO 5463 : NVPTXInst<(outs), 5464 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5465 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5466 []>; 5467 def SUST_B_1D_ARRAY_B32_ZERO 5468 : NVPTXInst<(outs), 5469 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5470 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5471 []>; 5472 def SUST_B_1D_ARRAY_B64_ZERO 5473 : NVPTXInst<(outs), 5474 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 5475 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5476 []>; 5477 def SUST_B_1D_ARRAY_V2B8_ZERO 5478 : NVPTXInst<(outs), 5479 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5480 Int16Regs:$g), 5481 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5482 []>; 5483 def SUST_B_1D_ARRAY_V2B16_ZERO 5484 : NVPTXInst<(outs), 5485 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5486 Int16Regs:$g), 5487 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5488 []>; 5489 def SUST_B_1D_ARRAY_V2B32_ZERO 5490 : NVPTXInst<(outs), 5491 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5492 Int32Regs:$g), 5493 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5494 []>; 5495 def SUST_B_1D_ARRAY_V2B64_ZERO 5496 : NVPTXInst<(outs), 5497 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 5498 Int64Regs:$g), 5499 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5500 []>; 5501 def SUST_B_1D_ARRAY_V4B8_ZERO 5502 : NVPTXInst<(outs), 5503 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5504 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5505 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], " 5506 "\\{$r, $g, $b, $a\\};", 5507 []>; 5508 def SUST_B_1D_ARRAY_V4B16_ZERO 5509 : NVPTXInst<(outs), 5510 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5511 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5512 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], " 5513 "\\{$r, $g, $b, $a\\};", 5514 []>; 5515 def SUST_B_1D_ARRAY_V4B32_ZERO 5516 : NVPTXInst<(outs), 5517 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5518 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5519 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], " 5520 "\\{$r, $g, $b, $a\\};", 5521 []>; 5522 5523 5524 def SUST_B_2D_B8_ZERO 5525 : NVPTXInst<(outs), 5526 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5527 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5528 []>; 5529 def SUST_B_2D_B16_ZERO 5530 : NVPTXInst<(outs), 5531 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5532 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5533 []>; 5534 def SUST_B_2D_B32_ZERO 5535 : NVPTXInst<(outs), 5536 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5537 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5538 []>; 5539 def SUST_B_2D_B64_ZERO 5540 : NVPTXInst<(outs), 5541 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5542 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5543 []>; 5544 def SUST_B_2D_V2B8_ZERO 5545 : NVPTXInst<(outs), 5546 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5547 Int16Regs:$g), 5548 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5549 []>; 5550 def SUST_B_2D_V2B16_ZERO 5551 : NVPTXInst<(outs), 5552 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5553 Int16Regs:$g), 5554 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5555 []>; 5556 def SUST_B_2D_V2B32_ZERO 5557 : NVPTXInst<(outs), 5558 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5559 Int32Regs:$g), 5560 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5561 []>; 5562 def SUST_B_2D_V2B64_ZERO 5563 : NVPTXInst<(outs), 5564 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5565 Int64Regs:$g), 5566 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5567 []>; 5568 def SUST_B_2D_V4B8_ZERO 5569 : NVPTXInst<(outs), 5570 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5571 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5572 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], " 5573 "\\{$r, $g, $b, $a\\};", 5574 []>; 5575 def SUST_B_2D_V4B16_ZERO 5576 : NVPTXInst<(outs), 5577 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5578 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5579 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], " 5580 "\\{$r, $g, $b, $a\\};", 5581 []>; 5582 def SUST_B_2D_V4B32_ZERO 5583 : NVPTXInst<(outs), 5584 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5585 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5586 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], " 5587 "\\{$r, $g, $b, $a\\};", 5588 []>; 5589 5590 5591 def SUST_B_2D_ARRAY_B8_ZERO 5592 : NVPTXInst<(outs), 5593 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5594 Int16Regs:$r), 5595 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5596 []>; 5597 def SUST_B_2D_ARRAY_B16_ZERO 5598 : NVPTXInst<(outs), 5599 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5600 Int16Regs:$r), 5601 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5602 []>; 5603 def SUST_B_2D_ARRAY_B32_ZERO 5604 : NVPTXInst<(outs), 5605 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5606 Int32Regs:$r), 5607 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5608 []>; 5609 def SUST_B_2D_ARRAY_B64_ZERO 5610 : NVPTXInst<(outs), 5611 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5612 Int64Regs:$r), 5613 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5614 []>; 5615 def SUST_B_2D_ARRAY_V2B8_ZERO 5616 : NVPTXInst<(outs), 5617 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5618 Int16Regs:$r, Int16Regs:$g), 5619 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5620 "\\{$r, $g\\};", 5621 []>; 5622 def SUST_B_2D_ARRAY_V2B16_ZERO 5623 : NVPTXInst<(outs), 5624 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5625 Int16Regs:$r, Int16Regs:$g), 5626 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5627 "\\{$r, $g\\};", 5628 []>; 5629 def SUST_B_2D_ARRAY_V2B32_ZERO 5630 : NVPTXInst<(outs), 5631 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5632 Int32Regs:$r, Int32Regs:$g), 5633 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5634 "\\{$r, $g\\};", 5635 []>; 5636 def SUST_B_2D_ARRAY_V2B64_ZERO 5637 : NVPTXInst<(outs), 5638 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5639 Int64Regs:$r, Int64Regs:$g), 5640 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5641 "\\{$r, $g\\};", 5642 []>; 5643 def SUST_B_2D_ARRAY_V4B8_ZERO 5644 : NVPTXInst<(outs), 5645 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5646 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5647 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5648 "\\{$r, $g, $b, $a\\};", 5649 []>; 5650 def SUST_B_2D_ARRAY_V4B16_ZERO 5651 : NVPTXInst<(outs), 5652 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5653 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5654 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5655 "\\{$r, $g, $b, $a\\};", 5656 []>; 5657 def SUST_B_2D_ARRAY_V4B32_ZERO 5658 : NVPTXInst<(outs), 5659 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5660 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5661 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5662 "\\{$r, $g, $b, $a\\};", 5663 []>; 5664 5665 5666 def SUST_B_3D_B8_ZERO 5667 : NVPTXInst<(outs), 5668 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5669 Int16Regs:$r), 5670 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5671 []>; 5672 def SUST_B_3D_B16_ZERO 5673 : NVPTXInst<(outs), 5674 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5675 Int16Regs:$r), 5676 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5677 []>; 5678 def SUST_B_3D_B32_ZERO 5679 : NVPTXInst<(outs), 5680 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5681 Int32Regs:$r), 5682 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5683 []>; 5684 def SUST_B_3D_B64_ZERO 5685 : NVPTXInst<(outs), 5686 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5687 Int64Regs:$r), 5688 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5689 []>; 5690 def SUST_B_3D_V2B8_ZERO 5691 : NVPTXInst<(outs), 5692 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5693 Int16Regs:$r, Int16Regs:$g), 5694 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5695 "\\{$r, $g\\};", 5696 []>; 5697 def SUST_B_3D_V2B16_ZERO 5698 : NVPTXInst<(outs), 5699 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5700 Int16Regs:$r, Int16Regs:$g), 5701 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5702 "\\{$r, $g\\};", 5703 []>; 5704 def SUST_B_3D_V2B32_ZERO 5705 : NVPTXInst<(outs), 5706 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5707 Int32Regs:$r, Int32Regs:$g), 5708 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5709 "\\{$r, $g\\};", 5710 []>; 5711 def SUST_B_3D_V2B64_ZERO 5712 : NVPTXInst<(outs), 5713 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5714 Int64Regs:$r, Int64Regs:$g), 5715 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5716 "\\{$r, $g\\};", 5717 []>; 5718 def SUST_B_3D_V4B8_ZERO 5719 : NVPTXInst<(outs), 5720 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5721 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5722 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5723 "\\{$r, $g, $b, $a\\};", 5724 []>; 5725 def SUST_B_3D_V4B16_ZERO 5726 : NVPTXInst<(outs), 5727 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5728 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5729 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5730 "\\{$r, $g, $b, $a\\};", 5731 []>; 5732 def SUST_B_3D_V4B32_ZERO 5733 : NVPTXInst<(outs), 5734 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5735 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5736 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5737 "\\{$r, $g, $b, $a\\};", 5738 []>; 5739 5740 5741 5742 // Formatted 5743 5744 def SUST_P_1D_B8_TRAP 5745 : NVPTXInst<(outs), 5746 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5747 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", 5748 []>; 5749 def SUST_P_1D_B16_TRAP 5750 : NVPTXInst<(outs), 5751 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5752 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", 5753 []>; 5754 def SUST_P_1D_B32_TRAP 5755 : NVPTXInst<(outs), 5756 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5757 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", 5758 []>; 5759 def SUST_P_1D_V2B8_TRAP 5760 : NVPTXInst<(outs), 5761 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5762 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5763 []>; 5764 def SUST_P_1D_V2B16_TRAP 5765 : NVPTXInst<(outs), 5766 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5767 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5768 []>; 5769 def SUST_P_1D_V2B32_TRAP 5770 : NVPTXInst<(outs), 5771 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5772 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5773 []>; 5774 def SUST_P_1D_V4B8_TRAP 5775 : NVPTXInst<(outs), 5776 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5777 Int16Regs:$b, Int16Regs:$a), 5778 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5779 []>; 5780 def SUST_P_1D_V4B16_TRAP 5781 : NVPTXInst<(outs), 5782 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5783 Int16Regs:$b, Int16Regs:$a), 5784 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5785 []>; 5786 def SUST_P_1D_V4B32_TRAP 5787 : NVPTXInst<(outs), 5788 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5789 Int32Regs:$b, Int32Regs:$a), 5790 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5791 []>; 5792 5793 5794 def SUST_P_1D_ARRAY_B8_TRAP 5795 : NVPTXInst<(outs), 5796 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5797 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5798 []>; 5799 def SUST_P_1D_ARRAY_B16_TRAP 5800 : NVPTXInst<(outs), 5801 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5802 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5803 []>; 5804 def SUST_P_1D_ARRAY_B32_TRAP 5805 : NVPTXInst<(outs), 5806 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5807 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5808 []>; 5809 def SUST_P_1D_ARRAY_V2B8_TRAP 5810 : NVPTXInst<(outs), 5811 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5812 Int16Regs:$g), 5813 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5814 []>; 5815 def SUST_P_1D_ARRAY_V2B16_TRAP 5816 : NVPTXInst<(outs), 5817 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5818 Int16Regs:$g), 5819 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5820 []>; 5821 def SUST_P_1D_ARRAY_V2B32_TRAP 5822 : NVPTXInst<(outs), 5823 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5824 Int32Regs:$g), 5825 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5826 []>; 5827 def SUST_P_1D_ARRAY_V4B8_TRAP 5828 : NVPTXInst<(outs), 5829 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5830 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5831 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " 5832 "\\{$r, $g, $b, $a\\};", 5833 []>; 5834 def SUST_P_1D_ARRAY_V4B16_TRAP 5835 : NVPTXInst<(outs), 5836 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5837 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5838 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " 5839 "\\{$r, $g, $b, $a\\};", 5840 []>; 5841 def SUST_P_1D_ARRAY_V4B32_TRAP 5842 : NVPTXInst<(outs), 5843 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5844 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5845 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " 5846 "\\{$r, $g, $b, $a\\};", 5847 []>; 5848 5849 5850 def SUST_P_2D_B8_TRAP 5851 : NVPTXInst<(outs), 5852 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5853 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5854 []>; 5855 def SUST_P_2D_B16_TRAP 5856 : NVPTXInst<(outs), 5857 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5858 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5859 []>; 5860 def SUST_P_2D_B32_TRAP 5861 : NVPTXInst<(outs), 5862 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5863 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5864 []>; 5865 def SUST_P_2D_V2B8_TRAP 5866 : NVPTXInst<(outs), 5867 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5868 Int16Regs:$g), 5869 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5870 []>; 5871 def SUST_P_2D_V2B16_TRAP 5872 : NVPTXInst<(outs), 5873 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5874 Int16Regs:$g), 5875 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5876 []>; 5877 def SUST_P_2D_V2B32_TRAP 5878 : NVPTXInst<(outs), 5879 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5880 Int32Regs:$g), 5881 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5882 []>; 5883 def SUST_P_2D_V4B8_TRAP 5884 : NVPTXInst<(outs), 5885 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5886 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5887 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " 5888 "\\{$r, $g, $b, $a\\};", 5889 []>; 5890 def SUST_P_2D_V4B16_TRAP 5891 : NVPTXInst<(outs), 5892 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5893 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5894 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " 5895 "\\{$r, $g, $b, $a\\};", 5896 []>; 5897 def SUST_P_2D_V4B32_TRAP 5898 : NVPTXInst<(outs), 5899 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5900 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5901 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " 5902 "\\{$r, $g, $b, $a\\};", 5903 []>; 5904 5905 5906 def SUST_P_2D_ARRAY_B8_TRAP 5907 : NVPTXInst<(outs), 5908 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5909 Int16Regs:$r), 5910 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5911 []>; 5912 def SUST_P_2D_ARRAY_B16_TRAP 5913 : NVPTXInst<(outs), 5914 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5915 Int16Regs:$r), 5916 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5917 []>; 5918 def SUST_P_2D_ARRAY_B32_TRAP 5919 : NVPTXInst<(outs), 5920 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5921 Int32Regs:$r), 5922 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5923 []>; 5924 def SUST_P_2D_ARRAY_V2B8_TRAP 5925 : NVPTXInst<(outs), 5926 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5927 Int16Regs:$r, Int16Regs:$g), 5928 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5929 "\\{$r, $g\\};", 5930 []>; 5931 def SUST_P_2D_ARRAY_V2B16_TRAP 5932 : NVPTXInst<(outs), 5933 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5934 Int16Regs:$r, Int16Regs:$g), 5935 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5936 "\\{$r, $g\\};", 5937 []>; 5938 def SUST_P_2D_ARRAY_V2B32_TRAP 5939 : NVPTXInst<(outs), 5940 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5941 Int32Regs:$r, Int32Regs:$g), 5942 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5943 "\\{$r, $g\\};", 5944 []>; 5945 def SUST_P_2D_ARRAY_V4B8_TRAP 5946 : NVPTXInst<(outs), 5947 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5948 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5949 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5950 "\\{$r, $g, $b, $a\\};", 5951 []>; 5952 def SUST_P_2D_ARRAY_V4B16_TRAP 5953 : NVPTXInst<(outs), 5954 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5955 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5956 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5957 "\\{$r, $g, $b, $a\\};", 5958 []>; 5959 def SUST_P_2D_ARRAY_V4B32_TRAP 5960 : NVPTXInst<(outs), 5961 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5962 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5963 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5964 "\\{$r, $g, $b, $a\\};", 5965 []>; 5966 5967 5968 def SUST_P_3D_B8_TRAP 5969 : NVPTXInst<(outs), 5970 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5971 Int16Regs:$r), 5972 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5973 []>; 5974 def SUST_P_3D_B16_TRAP 5975 : NVPTXInst<(outs), 5976 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5977 Int16Regs:$r), 5978 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5979 []>; 5980 def SUST_P_3D_B32_TRAP 5981 : NVPTXInst<(outs), 5982 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5983 Int32Regs:$r), 5984 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5985 []>; 5986 def SUST_P_3D_V2B8_TRAP 5987 : NVPTXInst<(outs), 5988 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5989 Int16Regs:$r, Int16Regs:$g), 5990 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5991 "\\{$r, $g\\};", 5992 []>; 5993 def SUST_P_3D_V2B16_TRAP 5994 : NVPTXInst<(outs), 5995 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5996 Int16Regs:$r, Int16Regs:$g), 5997 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5998 "\\{$r, $g\\};", 5999 []>; 6000 def SUST_P_3D_V2B32_TRAP 6001 : NVPTXInst<(outs), 6002 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6003 Int32Regs:$r, Int32Regs:$g), 6004 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6005 "\\{$r, $g\\};", 6006 []>; 6007 def SUST_P_3D_V4B8_TRAP 6008 : NVPTXInst<(outs), 6009 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6010 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6011 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6012 "\\{$r, $g, $b, $a\\};", 6013 []>; 6014 def SUST_P_3D_V4B16_TRAP 6015 : NVPTXInst<(outs), 6016 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6017 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6018 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6019 "\\{$r, $g, $b, $a\\};", 6020 []>; 6021 def SUST_P_3D_V4B32_TRAP 6022 : NVPTXInst<(outs), 6023 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6024 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6025 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6026 "\\{$r, $g, $b, $a\\};", 6027 []>; 6028 } 6029 6030 // Surface store instruction patterns 6031 // I'm not sure why we can't just include these in the instruction definitions, 6032 // but TableGen complains of type errors :( 6033 6034 // .clamp variant 6035 def : Pat<(int_nvvm_sust_b_1d_i8_clamp 6036 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6037 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6038 6039 def : Pat<(int_nvvm_sust_b_1d_i16_clamp 6040 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6041 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6042 6043 def : Pat<(int_nvvm_sust_b_1d_i32_clamp 6044 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6045 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6046 6047 def : Pat<(int_nvvm_sust_b_1d_i64_clamp 6048 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6049 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6050 6051 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp 6052 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6053 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, 6054 Int16Regs:$r, Int16Regs:$g)>; 6055 6056 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp 6057 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6058 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, 6059 Int16Regs:$r, Int16Regs:$g)>; 6060 6061 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp 6062 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6063 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, 6064 Int32Regs:$r, Int32Regs:$g)>; 6065 6066 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp 6067 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6068 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, 6069 Int64Regs:$r, Int64Regs:$g)>; 6070 6071 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp 6072 Int64Regs:$s, Int32Regs:$x, 6073 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6074 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, 6075 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6076 6077 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp 6078 Int64Regs:$s, Int32Regs:$x, 6079 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6080 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, 6081 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6082 6083 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp 6084 Int64Regs:$s, Int32Regs:$x, 6085 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6086 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, 6087 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6088 6089 6090 6091 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp 6092 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6093 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6094 Int16Regs:$r)>; 6095 6096 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp 6097 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6098 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6099 Int16Regs:$r)>; 6100 6101 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp 6102 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6103 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6104 Int32Regs:$r)>; 6105 6106 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp 6107 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6108 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6109 Int64Regs:$r)>; 6110 6111 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp 6112 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6113 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6114 Int16Regs:$r, Int16Regs:$g)>; 6115 6116 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp 6117 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6118 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6119 Int16Regs:$r, Int16Regs:$g)>; 6120 6121 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp 6122 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6123 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6124 Int32Regs:$r, Int32Regs:$g)>; 6125 6126 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp 6127 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6128 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6129 Int64Regs:$r, Int64Regs:$g)>; 6130 6131 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp 6132 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6133 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6134 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6135 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6136 6137 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp 6138 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6139 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6140 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6141 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6142 6143 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp 6144 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6145 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6146 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6147 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6148 6149 6150 6151 def : Pat<(int_nvvm_sust_b_2d_i8_clamp 6152 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6153 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6154 Int16Regs:$r)>; 6155 6156 def : Pat<(int_nvvm_sust_b_2d_i16_clamp 6157 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6158 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6159 Int16Regs:$r)>; 6160 6161 def : Pat<(int_nvvm_sust_b_2d_i32_clamp 6162 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6163 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6164 Int32Regs:$r)>; 6165 6166 def : Pat<(int_nvvm_sust_b_2d_i64_clamp 6167 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6168 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6169 Int64Regs:$r)>; 6170 6171 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp 6172 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6173 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6174 Int16Regs:$r, Int16Regs:$g)>; 6175 6176 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp 6177 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6178 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6179 Int16Regs:$r, Int16Regs:$g)>; 6180 6181 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp 6182 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6183 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6184 Int32Regs:$r, Int32Regs:$g)>; 6185 6186 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp 6187 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6188 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6189 Int64Regs:$r, Int64Regs:$g)>; 6190 6191 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp 6192 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6193 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6194 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6195 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6196 6197 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp 6198 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6199 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6200 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6201 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6202 6203 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp 6204 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6205 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6206 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6207 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6208 6209 6210 6211 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp 6212 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6213 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s, 6214 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6215 Int16Regs:$r)>; 6216 6217 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp 6218 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6219 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s, 6220 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6221 Int16Regs:$r)>; 6222 6223 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp 6224 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6225 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s, 6226 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6227 Int32Regs:$r)>; 6228 6229 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp 6230 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6231 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s, 6232 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6233 Int64Regs:$r)>; 6234 6235 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp 6236 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6237 Int16Regs:$r, Int16Regs:$g), 6238 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, 6239 Int32Regs:$x, Int32Regs:$y, 6240 Int16Regs:$r, Int16Regs:$g)>; 6241 6242 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp 6243 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6244 Int16Regs:$r, Int16Regs:$g), 6245 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, 6246 Int32Regs:$x, Int32Regs:$y, 6247 Int16Regs:$r, Int16Regs:$g)>; 6248 6249 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp 6250 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6251 Int32Regs:$g), 6252 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, 6253 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6254 6255 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp 6256 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6257 Int64Regs:$g), 6258 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, 6259 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6260 6261 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp 6262 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6263 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6264 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s, 6265 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6266 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6267 6268 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp 6269 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6270 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6271 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s, 6272 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6273 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6274 6275 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp 6276 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6277 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6278 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, 6279 Int32Regs:$x, Int32Regs:$y, 6280 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6281 6282 6283 6284 def : Pat<(int_nvvm_sust_b_3d_i8_clamp 6285 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6286 Int16Regs:$r), 6287 (SUST_B_3D_B8_CLAMP Int64Regs:$s, 6288 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6289 Int16Regs:$r)>; 6290 6291 def : Pat<(int_nvvm_sust_b_3d_i16_clamp 6292 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6293 Int16Regs:$r), 6294 (SUST_B_3D_B16_CLAMP Int64Regs:$s, 6295 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6296 Int16Regs:$r)>; 6297 6298 def : Pat<(int_nvvm_sust_b_3d_i32_clamp 6299 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6300 Int32Regs:$r), 6301 (SUST_B_3D_B32_CLAMP Int64Regs:$s, 6302 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6303 Int32Regs:$r)>; 6304 6305 def : Pat<(int_nvvm_sust_b_3d_i64_clamp 6306 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6307 Int64Regs:$r), 6308 (SUST_B_3D_B64_CLAMP Int64Regs:$s, 6309 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6310 Int64Regs:$r)>; 6311 6312 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp 6313 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6314 Int16Regs:$r, Int16Regs:$g), 6315 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s, 6316 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6317 Int16Regs:$r, Int16Regs:$g)>; 6318 6319 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp 6320 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6321 Int16Regs:$r, Int16Regs:$g), 6322 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s, 6323 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6324 Int16Regs:$r, Int16Regs:$g)>; 6325 6326 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp 6327 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6328 Int32Regs:$r, Int32Regs:$g), 6329 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s, 6330 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6331 Int32Regs:$r, Int32Regs:$g)>; 6332 6333 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp 6334 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6335 Int64Regs:$r, Int64Regs:$g), 6336 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s, 6337 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6338 Int64Regs:$r, Int64Regs:$g)>; 6339 6340 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp 6341 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6342 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6343 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s, 6344 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6345 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6346 6347 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp 6348 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6349 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6350 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s, 6351 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6352 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6353 6354 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp 6355 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6356 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6357 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s, 6358 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6359 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6360 6361 6362 // .trap variant 6363 def : Pat<(int_nvvm_sust_b_1d_i8_trap 6364 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6365 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6366 6367 def : Pat<(int_nvvm_sust_b_1d_i16_trap 6368 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6369 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6370 6371 def : Pat<(int_nvvm_sust_b_1d_i32_trap 6372 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6373 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6374 6375 def : Pat<(int_nvvm_sust_b_1d_i64_trap 6376 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6377 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6378 6379 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 6380 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6381 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, 6382 Int16Regs:$r, Int16Regs:$g)>; 6383 6384 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 6385 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6386 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, 6387 Int16Regs:$r, Int16Regs:$g)>; 6388 6389 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 6390 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6391 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, 6392 Int32Regs:$r, Int32Regs:$g)>; 6393 6394 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap 6395 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6396 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, 6397 Int64Regs:$r, Int64Regs:$g)>; 6398 6399 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 6400 Int64Regs:$s, Int32Regs:$x, 6401 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6402 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, 6403 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6404 6405 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 6406 Int64Regs:$s, Int32Regs:$x, 6407 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6408 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, 6409 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6410 6411 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 6412 Int64Regs:$s, Int32Regs:$x, 6413 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6414 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, 6415 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6416 6417 6418 6419 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 6420 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6421 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6422 Int16Regs:$r)>; 6423 6424 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 6425 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6426 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6427 Int16Regs:$r)>; 6428 6429 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 6430 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6431 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6432 Int32Regs:$r)>; 6433 6434 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap 6435 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6436 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6437 Int64Regs:$r)>; 6438 6439 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 6440 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6441 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6442 Int16Regs:$r, Int16Regs:$g)>; 6443 6444 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 6445 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6446 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6447 Int16Regs:$r, Int16Regs:$g)>; 6448 6449 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 6450 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6451 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6452 Int32Regs:$r, Int32Regs:$g)>; 6453 6454 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap 6455 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6456 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6457 Int64Regs:$r, Int64Regs:$g)>; 6458 6459 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 6460 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6461 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6462 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6463 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6464 6465 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 6466 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6467 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6468 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6469 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6470 6471 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 6472 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6473 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6474 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6475 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6476 6477 6478 6479 def : Pat<(int_nvvm_sust_b_2d_i8_trap 6480 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6481 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6482 Int16Regs:$r)>; 6483 6484 def : Pat<(int_nvvm_sust_b_2d_i16_trap 6485 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6486 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6487 Int16Regs:$r)>; 6488 6489 def : Pat<(int_nvvm_sust_b_2d_i32_trap 6490 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6491 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6492 Int32Regs:$r)>; 6493 6494 def : Pat<(int_nvvm_sust_b_2d_i64_trap 6495 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6496 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6497 Int64Regs:$r)>; 6498 6499 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 6500 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6501 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6502 Int16Regs:$r, Int16Regs:$g)>; 6503 6504 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 6505 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6506 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6507 Int16Regs:$r, Int16Regs:$g)>; 6508 6509 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 6510 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6511 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6512 Int32Regs:$r, Int32Regs:$g)>; 6513 6514 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap 6515 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6516 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6517 Int64Regs:$r, Int64Regs:$g)>; 6518 6519 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 6520 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6521 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6522 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6523 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6524 6525 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 6526 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6527 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6528 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6529 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6530 6531 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 6532 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6533 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6534 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6535 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6536 6537 6538 6539 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 6540 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6541 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s, 6542 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6543 Int16Regs:$r)>; 6544 6545 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 6546 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6547 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s, 6548 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6549 Int16Regs:$r)>; 6550 6551 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 6552 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6553 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s, 6554 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6555 Int32Regs:$r)>; 6556 6557 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap 6558 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6559 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s, 6560 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6561 Int64Regs:$r)>; 6562 6563 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 6564 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6565 Int16Regs:$r, Int16Regs:$g), 6566 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, 6567 Int32Regs:$x, Int32Regs:$y, 6568 Int16Regs:$r, Int16Regs:$g)>; 6569 6570 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 6571 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6572 Int16Regs:$r, Int16Regs:$g), 6573 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, 6574 Int32Regs:$x, Int32Regs:$y, 6575 Int16Regs:$r, Int16Regs:$g)>; 6576 6577 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 6578 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6579 Int32Regs:$g), 6580 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, 6581 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6582 6583 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap 6584 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6585 Int64Regs:$g), 6586 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, 6587 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6588 6589 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 6590 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6591 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6592 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s, 6593 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6594 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6595 6596 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 6597 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6598 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6599 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s, 6600 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6601 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6602 6603 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 6604 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6605 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6606 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, 6607 Int32Regs:$x, Int32Regs:$y, 6608 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6609 6610 6611 6612 def : Pat<(int_nvvm_sust_b_3d_i8_trap 6613 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6614 Int16Regs:$r), 6615 (SUST_B_3D_B8_TRAP Int64Regs:$s, 6616 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6617 Int16Regs:$r)>; 6618 6619 def : Pat<(int_nvvm_sust_b_3d_i16_trap 6620 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6621 Int16Regs:$r), 6622 (SUST_B_3D_B16_TRAP Int64Regs:$s, 6623 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6624 Int16Regs:$r)>; 6625 6626 def : Pat<(int_nvvm_sust_b_3d_i32_trap 6627 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6628 Int32Regs:$r), 6629 (SUST_B_3D_B32_TRAP Int64Regs:$s, 6630 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6631 Int32Regs:$r)>; 6632 6633 def : Pat<(int_nvvm_sust_b_3d_i64_trap 6634 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6635 Int64Regs:$r), 6636 (SUST_B_3D_B64_TRAP Int64Regs:$s, 6637 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6638 Int64Regs:$r)>; 6639 6640 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 6641 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6642 Int16Regs:$r, Int16Regs:$g), 6643 (SUST_B_3D_V2B8_TRAP Int64Regs:$s, 6644 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6645 Int16Regs:$r, Int16Regs:$g)>; 6646 6647 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 6648 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6649 Int16Regs:$r, Int16Regs:$g), 6650 (SUST_B_3D_V2B16_TRAP Int64Regs:$s, 6651 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6652 Int16Regs:$r, Int16Regs:$g)>; 6653 6654 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 6655 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6656 Int32Regs:$r, Int32Regs:$g), 6657 (SUST_B_3D_V2B32_TRAP Int64Regs:$s, 6658 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6659 Int32Regs:$r, Int32Regs:$g)>; 6660 6661 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap 6662 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6663 Int64Regs:$r, Int64Regs:$g), 6664 (SUST_B_3D_V2B64_TRAP Int64Regs:$s, 6665 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6666 Int64Regs:$r, Int64Regs:$g)>; 6667 6668 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 6669 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6670 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6671 (SUST_B_3D_V4B8_TRAP Int64Regs:$s, 6672 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6673 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6674 6675 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 6676 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6677 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6678 (SUST_B_3D_V4B16_TRAP Int64Regs:$s, 6679 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6680 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6681 6682 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 6683 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6684 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6685 (SUST_B_3D_V4B32_TRAP Int64Regs:$s, 6686 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6687 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6688 6689 6690 // .zero variant 6691 def : Pat<(int_nvvm_sust_b_1d_i8_zero 6692 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6693 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6694 6695 def : Pat<(int_nvvm_sust_b_1d_i16_zero 6696 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6697 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6698 6699 def : Pat<(int_nvvm_sust_b_1d_i32_zero 6700 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6701 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6702 6703 def : Pat<(int_nvvm_sust_b_1d_i64_zero 6704 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6705 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6706 6707 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero 6708 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6709 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, 6710 Int16Regs:$r, Int16Regs:$g)>; 6711 6712 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero 6713 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6714 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, 6715 Int16Regs:$r, Int16Regs:$g)>; 6716 6717 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero 6718 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6719 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, 6720 Int32Regs:$r, Int32Regs:$g)>; 6721 6722 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero 6723 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6724 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, 6725 Int64Regs:$r, Int64Regs:$g)>; 6726 6727 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero 6728 Int64Regs:$s, Int32Regs:$x, 6729 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6730 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, 6731 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6732 6733 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero 6734 Int64Regs:$s, Int32Regs:$x, 6735 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6736 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, 6737 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6738 6739 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero 6740 Int64Regs:$s, Int32Regs:$x, 6741 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6742 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, 6743 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6744 6745 6746 6747 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero 6748 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6749 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6750 Int16Regs:$r)>; 6751 6752 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero 6753 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6754 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6755 Int16Regs:$r)>; 6756 6757 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero 6758 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6759 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6760 Int32Regs:$r)>; 6761 6762 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero 6763 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6764 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6765 Int64Regs:$r)>; 6766 6767 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero 6768 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6769 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6770 Int16Regs:$r, Int16Regs:$g)>; 6771 6772 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero 6773 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6774 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6775 Int16Regs:$r, Int16Regs:$g)>; 6776 6777 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero 6778 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6779 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6780 Int32Regs:$r, Int32Regs:$g)>; 6781 6782 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero 6783 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6784 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6785 Int64Regs:$r, Int64Regs:$g)>; 6786 6787 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero 6788 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6789 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6790 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6791 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6792 6793 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero 6794 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6795 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6796 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6797 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6798 6799 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero 6800 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6801 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6802 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6803 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6804 6805 6806 6807 def : Pat<(int_nvvm_sust_b_2d_i8_zero 6808 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6809 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6810 Int16Regs:$r)>; 6811 6812 def : Pat<(int_nvvm_sust_b_2d_i16_zero 6813 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6814 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6815 Int16Regs:$r)>; 6816 6817 def : Pat<(int_nvvm_sust_b_2d_i32_zero 6818 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6819 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6820 Int32Regs:$r)>; 6821 6822 def : Pat<(int_nvvm_sust_b_2d_i64_zero 6823 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6824 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6825 Int64Regs:$r)>; 6826 6827 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero 6828 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6829 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6830 Int16Regs:$r, Int16Regs:$g)>; 6831 6832 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero 6833 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6834 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6835 Int16Regs:$r, Int16Regs:$g)>; 6836 6837 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero 6838 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6839 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6840 Int32Regs:$r, Int32Regs:$g)>; 6841 6842 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero 6843 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6844 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6845 Int64Regs:$r, Int64Regs:$g)>; 6846 6847 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero 6848 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6849 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6850 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6851 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6852 6853 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero 6854 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6855 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6856 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6857 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6858 6859 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero 6860 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6861 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6862 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6863 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6864 6865 6866 6867 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero 6868 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6869 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s, 6870 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6871 Int16Regs:$r)>; 6872 6873 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero 6874 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6875 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s, 6876 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6877 Int16Regs:$r)>; 6878 6879 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero 6880 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6881 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s, 6882 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6883 Int32Regs:$r)>; 6884 6885 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero 6886 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6887 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s, 6888 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6889 Int64Regs:$r)>; 6890 6891 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero 6892 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6893 Int16Regs:$r, Int16Regs:$g), 6894 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, 6895 Int32Regs:$x, Int32Regs:$y, 6896 Int16Regs:$r, Int16Regs:$g)>; 6897 6898 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero 6899 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6900 Int16Regs:$r, Int16Regs:$g), 6901 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, 6902 Int32Regs:$x, Int32Regs:$y, 6903 Int16Regs:$r, Int16Regs:$g)>; 6904 6905 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero 6906 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6907 Int32Regs:$g), 6908 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, 6909 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6910 6911 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero 6912 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6913 Int64Regs:$g), 6914 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, 6915 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6916 6917 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero 6918 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6919 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6920 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s, 6921 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6922 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6923 6924 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero 6925 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6926 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6927 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s, 6928 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6929 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6930 6931 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero 6932 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6933 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6934 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, 6935 Int32Regs:$x, Int32Regs:$y, 6936 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6937 6938 6939 6940 def : Pat<(int_nvvm_sust_b_3d_i8_zero 6941 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6942 Int16Regs:$r), 6943 (SUST_B_3D_B8_ZERO Int64Regs:$s, 6944 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6945 Int16Regs:$r)>; 6946 6947 def : Pat<(int_nvvm_sust_b_3d_i16_zero 6948 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6949 Int16Regs:$r), 6950 (SUST_B_3D_B16_ZERO Int64Regs:$s, 6951 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6952 Int16Regs:$r)>; 6953 6954 def : Pat<(int_nvvm_sust_b_3d_i32_zero 6955 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6956 Int32Regs:$r), 6957 (SUST_B_3D_B32_ZERO Int64Regs:$s, 6958 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6959 Int32Regs:$r)>; 6960 6961 def : Pat<(int_nvvm_sust_b_3d_i64_zero 6962 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6963 Int64Regs:$r), 6964 (SUST_B_3D_B64_ZERO Int64Regs:$s, 6965 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6966 Int64Regs:$r)>; 6967 6968 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero 6969 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6970 Int16Regs:$r, Int16Regs:$g), 6971 (SUST_B_3D_V2B8_ZERO Int64Regs:$s, 6972 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6973 Int16Regs:$r, Int16Regs:$g)>; 6974 6975 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero 6976 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6977 Int16Regs:$r, Int16Regs:$g), 6978 (SUST_B_3D_V2B16_ZERO Int64Regs:$s, 6979 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6980 Int16Regs:$r, Int16Regs:$g)>; 6981 6982 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero 6983 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6984 Int32Regs:$r, Int32Regs:$g), 6985 (SUST_B_3D_V2B32_ZERO Int64Regs:$s, 6986 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6987 Int32Regs:$r, Int32Regs:$g)>; 6988 6989 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero 6990 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6991 Int64Regs:$r, Int64Regs:$g), 6992 (SUST_B_3D_V2B64_ZERO Int64Regs:$s, 6993 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6994 Int64Regs:$r, Int64Regs:$g)>; 6995 6996 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero 6997 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6998 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6999 (SUST_B_3D_V4B8_ZERO Int64Regs:$s, 7000 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7001 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7002 7003 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero 7004 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7005 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7006 (SUST_B_3D_V4B16_ZERO Int64Regs:$s, 7007 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7008 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7009 7010 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero 7011 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7012 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7013 (SUST_B_3D_V4B32_ZERO Int64Regs:$s, 7014 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7015 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7016 7017 7018 7019 7020 def : Pat<(int_nvvm_sust_p_1d_i8_trap 7021 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 7022 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 7023 7024 def : Pat<(int_nvvm_sust_p_1d_i16_trap 7025 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 7026 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 7027 7028 def : Pat<(int_nvvm_sust_p_1d_i32_trap 7029 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 7030 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 7031 7032 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 7033 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7034 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, 7035 Int16Regs:$r, Int16Regs:$g)>; 7036 7037 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 7038 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7039 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, 7040 Int16Regs:$r, Int16Regs:$g)>; 7041 7042 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 7043 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 7044 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, 7045 Int32Regs:$r, Int32Regs:$g)>; 7046 7047 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 7048 Int64Regs:$s, Int32Regs:$x, 7049 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7050 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, 7051 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7052 7053 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 7054 Int64Regs:$s, Int32Regs:$x, 7055 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7056 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, 7057 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7058 7059 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 7060 Int64Regs:$s, Int32Regs:$x, 7061 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7062 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, 7063 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7064 7065 7066 7067 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 7068 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 7069 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7070 Int16Regs:$r)>; 7071 7072 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 7073 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 7074 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7075 Int16Regs:$r)>; 7076 7077 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 7078 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 7079 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7080 Int32Regs:$r)>; 7081 7082 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 7083 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7084 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7085 Int16Regs:$r, Int16Regs:$g)>; 7086 7087 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 7088 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7089 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7090 Int16Regs:$r, Int16Regs:$g)>; 7091 7092 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 7093 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 7094 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7095 Int32Regs:$r, Int32Regs:$g)>; 7096 7097 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 7098 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7099 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7100 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7101 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7102 7103 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 7104 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7105 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7106 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7107 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7108 7109 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 7110 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7111 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7112 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7113 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7114 7115 7116 7117 def : Pat<(int_nvvm_sust_p_2d_i8_trap 7118 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7119 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7120 Int16Regs:$r)>; 7121 7122 def : Pat<(int_nvvm_sust_p_2d_i16_trap 7123 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7124 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7125 Int16Regs:$r)>; 7126 7127 def : Pat<(int_nvvm_sust_p_2d_i32_trap 7128 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7129 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7130 Int32Regs:$r)>; 7131 7132 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 7133 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7134 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7135 Int16Regs:$r, Int16Regs:$g)>; 7136 7137 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 7138 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7139 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7140 Int16Regs:$r, Int16Regs:$g)>; 7141 7142 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 7143 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 7144 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7145 Int32Regs:$r, Int32Regs:$g)>; 7146 7147 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 7148 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7149 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7150 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7151 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7152 7153 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 7154 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7155 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7156 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7157 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7158 7159 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 7160 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7161 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7162 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7163 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7164 7165 7166 7167 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 7168 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7169 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s, 7170 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7171 Int16Regs:$r)>; 7172 7173 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 7174 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7175 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s, 7176 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7177 Int16Regs:$r)>; 7178 7179 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 7180 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7181 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s, 7182 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7183 Int32Regs:$r)>; 7184 7185 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 7186 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7187 Int16Regs:$r, Int16Regs:$g), 7188 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, 7189 Int32Regs:$x, Int32Regs:$y, 7190 Int16Regs:$r, Int16Regs:$g)>; 7191 7192 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 7193 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7194 Int16Regs:$r, Int16Regs:$g), 7195 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, 7196 Int32Regs:$x, Int32Regs:$y, 7197 Int16Regs:$r, Int16Regs:$g)>; 7198 7199 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 7200 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 7201 Int32Regs:$g), 7202 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, 7203 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 7204 7205 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 7206 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7207 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7208 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s, 7209 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7210 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7211 7212 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 7213 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7214 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7215 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s, 7216 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7217 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7218 7219 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 7220 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7221 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7222 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, 7223 Int32Regs:$x, Int32Regs:$y, 7224 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7225 7226 7227 7228 def : Pat<(int_nvvm_sust_p_3d_i8_trap 7229 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7230 Int16Regs:$r), 7231 (SUST_P_3D_B8_TRAP Int64Regs:$s, 7232 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7233 Int16Regs:$r)>; 7234 7235 def : Pat<(int_nvvm_sust_p_3d_i16_trap 7236 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7237 Int16Regs:$r), 7238 (SUST_P_3D_B16_TRAP Int64Regs:$s, 7239 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7240 Int16Regs:$r)>; 7241 7242 def : Pat<(int_nvvm_sust_p_3d_i32_trap 7243 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7244 Int32Regs:$r), 7245 (SUST_P_3D_B32_TRAP Int64Regs:$s, 7246 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7247 Int32Regs:$r)>; 7248 7249 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 7250 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7251 Int16Regs:$r, Int16Regs:$g), 7252 (SUST_P_3D_V2B8_TRAP Int64Regs:$s, 7253 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7254 Int16Regs:$r, Int16Regs:$g)>; 7255 7256 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 7257 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7258 Int16Regs:$r, Int16Regs:$g), 7259 (SUST_P_3D_V2B16_TRAP Int64Regs:$s, 7260 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7261 Int16Regs:$r, Int16Regs:$g)>; 7262 7263 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 7264 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7265 Int32Regs:$r, Int32Regs:$g), 7266 (SUST_P_3D_V2B32_TRAP Int64Regs:$s, 7267 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7268 Int32Regs:$r, Int32Regs:$g)>; 7269 7270 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 7271 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7272 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7273 (SUST_P_3D_V4B8_TRAP Int64Regs:$s, 7274 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7275 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7276 7277 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 7278 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7279 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7280 (SUST_P_3D_V4B16_TRAP Int64Regs:$s, 7281 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7282 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7283 7284 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 7285 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7286 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7287 (SUST_P_3D_V4B32_TRAP Int64Regs:$s, 7288 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7289 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7290 7291 //----------------------------------- 7292 // Read Special Registers 7293 //----------------------------------- 7294 7295 class PTX_READ_SREG_R64<string regname, Intrinsic intop> 7296 : NVPTXInst<(outs Int64Regs:$d), (ins), 7297 !strconcat("mov.u64 \t$d, %", regname, ";"), 7298 [(set Int64Regs:$d, (intop))]>; 7299 7300 class PTX_READ_SREG_R32<string regname, Intrinsic intop> 7301 : NVPTXInst<(outs Int32Regs:$d), (ins), 7302 !strconcat("mov.u32 \t$d, %", regname, ";"), 7303 [(set Int32Regs:$d, (intop))]>; 7304 7305 // TODO Add read vector-version of special registers 7306 7307 def INT_PTX_SREG_TID_X : 7308 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>; 7309 def INT_PTX_SREG_TID_Y : 7310 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>; 7311 def INT_PTX_SREG_TID_Z : 7312 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>; 7313 def INT_PTX_SREG_TID_W : 7314 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>; 7315 7316 def INT_PTX_SREG_NTID_X : 7317 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>; 7318 def INT_PTX_SREG_NTID_Y : 7319 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>; 7320 def INT_PTX_SREG_NTID_Z : 7321 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>; 7322 def INT_PTX_SREG_NTID_W : 7323 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>; 7324 7325 def INT_PTX_SREG_LANEID : 7326 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; 7327 def INT_PTX_SREG_WARPID : 7328 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; 7329 def INT_PTX_SREG_NWARPID : 7330 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; 7331 7332 def INT_PTX_SREG_CTAID_X : 7333 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>; 7334 def INT_PTX_SREG_CTAID_Y : 7335 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>; 7336 def INT_PTX_SREG_CTAID_Z : 7337 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>; 7338 def INT_PTX_SREG_CTAID_W : 7339 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>; 7340 7341 def INT_PTX_SREG_NCTAID_X : 7342 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>; 7343 def INT_PTX_SREG_NCTAID_Y : 7344 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>; 7345 def INT_PTX_SREG_NCTAID_Z : 7346 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>; 7347 def INT_PTX_SREG_NCTAID_W : 7348 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>; 7349 7350 def INT_PTX_SREG_SMID : 7351 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; 7352 def INT_PTX_SREG_NSMID : 7353 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; 7354 def INT_PTX_SREG_GRIDID : 7355 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; 7356 7357 def INT_PTX_SREG_LANEMASK_EQ : 7358 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; 7359 def INT_PTX_SREG_LANEMASK_LE : 7360 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>; 7361 def INT_PTX_SREG_LANEMASK_LT : 7362 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>; 7363 def INT_PTX_SREG_LANEMASK_GE : 7364 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>; 7365 def INT_PTX_SREG_LANEMASK_GT : 7366 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; 7367 7368 def INT_PTX_SREG_CLOCK : 7369 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; 7370 def INT_PTX_SREG_CLOCK64 : 7371 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; 7372 7373 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; 7374 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; 7375 def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>; 7376 def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; 7377 7378 // TODO: It would be nice to use PTX_READ_SREG here, but it doesn't 7379 // handle the constant. 7380 def INT_PTX_SREG_WARPSIZE : 7381 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", 7382 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; 7383 7384 // 7385 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 7386 // 7387 7388 class EmptyNVPTXInst : NVPTXInst<(outs), (ins), "?", []>; 7389 7390 class WMMA_LOAD_GALSTOS<string Geometry, string Abc, string Layout, 7391 string Space, string Type, NVPTXRegClass regclass, 7392 DAGOperand SrcOp, bit WithStride> 7393 : EmptyNVPTXInst, 7394 Requires<[!if(!eq(Geometry, "m16n16k16"), 7395 hasPTX60, 7396 hasPTX61), 7397 hasSM70]> { 7398 // Pattern (created by WMMA_LOAD_INTR_HELPER below) that matches the intrinsic 7399 // for this function. 7400 PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA_" 7401 # Geometry # "_load_" 7402 # !subst("c", "c_" # Type, Abc) 7403 # "_" # Layout 7404 # !subst(".", "_", Space) 7405 # !if(WithStride,"_stride", "") 7406 # "_Intr"); 7407 dag OutsR03 = (outs regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3); 7408 dag OutsR47 = (outs regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7); 7409 dag Outs = !if(!eq(Abc#Type,"cf16"), OutsR03, !con(OutsR03, OutsR47)); 7410 7411 dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins)); 7412 dag Ins = !con((ins SrcOp:$src), StrideArg); 7413 7414 // Build a dag pattern that matches the intrinsic call. 7415 // We want a dag that looks like this: 7416 // (set <output args>, (intrinsic <input arguments>)) where input and 7417 // output arguments are named patterns that would match corresponding 7418 // input/output arguments of the instruction. 7419 // 7420 // First we construct (set <output arguments>) from instruction's outs dag by 7421 // replacing dag operator 'outs' with 'set'. 7422 dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp)); 7423 // Similarly, construct (intrinsic <input arguments>) sub-dag from 7424 // instruction's input arguments, only now we also need to replace operands 7425 // with patterns that would match them and the operator 'ins' with the 7426 // intrinsic. 7427 dag PatArgs = !foreach(tmp, Ins, 7428 !subst(imem, ADDRvar, 7429 !subst(MEMri64, ADDRri64, 7430 !subst(MEMri, ADDRri, 7431 !subst(ins, IntrMatcher, tmp))))); 7432 // Finally, consatenate both parts together. !con() requires both dags to have 7433 // the same operator, so we wrap PatArgs in a (set ...) dag. 7434 let Pattern = [!con(PatOuts, (set PatArgs))]; 7435 let OutOperandList = Outs; 7436 let InOperandList = Ins; 7437 let AsmString = "wmma.load." 7438 # Abc 7439 # ".sync" 7440 # "." # Layout 7441 # "." # Geometry 7442 # Space 7443 # "." # Type # " \t" 7444 # !if(!eq(Abc#Type, "cf16"), 7445 "{{$r0, $r1, $r2, $r3}}", 7446 "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}") 7447 # ", [$src]" 7448 # !if(WithStride, ", $ldm", "") 7449 # ";"; 7450 } 7451 7452 class WMMA_LOAD_INTR_HELPER<string Geometry, string Abc, string Layout, 7453 string Space, string Type, bit WithStride> 7454 : PatFrag <(ops),(ops)> { 7455 // Intrinsic that matches this instruction. 7456 Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma" 7457 # "_" # Geometry # "_load_" 7458 # Abc # "_" # Type # "_" # Layout 7459 # !if(WithStride,"_stride", "")); 7460 code match_generic = [{ 7461 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 7462 }]; 7463 code match_shared = [{ 7464 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 7465 }]; 7466 code match_global = [{ 7467 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 7468 }]; 7469 7470 let Operands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); 7471 let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))]; 7472 let PredicateCode = !if(!eq(Space, ".shared"), match_shared, 7473 !if(!eq(Space, ".global"), match_global, match_generic)); 7474 } 7475 7476 multiclass WMMA_LOAD_GALSTS<string Geometry, string Abc, string Layout, 7477 string Space, string Type, NVPTXRegClass regclass, 7478 bit WithStride> { 7479 def _avar: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass, 7480 imem, WithStride>; 7481 def _areg: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass, 7482 Int32Regs, WithStride>; 7483 def _areg64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass, 7484 Int64Regs, WithStride>; 7485 def _ari: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass, 7486 MEMri, WithStride>; 7487 def _ari64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass, 7488 MEMri64, WithStride>; 7489 } 7490 7491 multiclass WMMA_LOAD_GALSTSh<string Geometry, string Abc, string Layout, 7492 string Space, string Type, NVPTXRegClass regclass, 7493 bit WithStride> { 7494 // Define a PatFrag that matches appropriate intrinsic that loads from the 7495 // given address space. 7496 def _Intr: WMMA_LOAD_INTR_HELPER<Geometry, Abc, Layout, Space, Type, 7497 WithStride>; 7498 defm NAME: WMMA_LOAD_GALSTS<Geometry, Abc, Layout, Space, Type, regclass, 7499 WithStride>; 7500 } 7501 7502 multiclass WMMA_LOAD_GALST<string Geometry, string Abc, string Layout, 7503 string Space, string Type, NVPTXRegClass regclass> { 7504 defm _stride: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 1>; 7505 defm NAME: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 0>; 7506 } 7507 7508 multiclass WMMA_LOAD_GALT<string Geometry, string Abc, string Layout, 7509 string Type, NVPTXRegClass regclass> { 7510 defm _global: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".global", 7511 Type, regclass>; 7512 defm _shared: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".shared", 7513 Type, regclass>; 7514 defm NAME: WMMA_LOAD_GALST<Geometry, Abc, Layout, "", 7515 Type, regclass>; 7516 } 7517 7518 multiclass WMMA_LOAD_GAT<string Geometry, string Abc, 7519 string Type, NVPTXRegClass regclass> { 7520 defm _row: WMMA_LOAD_GALT<Geometry, Abc, "row", Type, regclass>; 7521 defm _col: WMMA_LOAD_GALT<Geometry, Abc, "col", Type, regclass>; 7522 } 7523 7524 multiclass WMMA_LOAD_G<string Geometry> { 7525 defm _load_a: WMMA_LOAD_GAT<Geometry, "a", "f16", Float16x2Regs>; 7526 defm _load_b: WMMA_LOAD_GAT<Geometry, "b", "f16", Float16x2Regs>; 7527 defm _load_c_f16: WMMA_LOAD_GAT<Geometry, "c", "f16", Float16x2Regs>; 7528 defm _load_c_f32: WMMA_LOAD_GAT<Geometry, "c", "f32", Float32Regs>; 7529 } 7530 7531 defm INT_WMMA_m32n8k16: WMMA_LOAD_G<"m32n8k16">; 7532 defm INT_WMMA_m16n16k16: WMMA_LOAD_G<"m16n16k16">; 7533 defm INT_WMMA_m8n32k16: WMMA_LOAD_G<"m8n32k16">; 7534 7535 // 7536 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 7537 // 7538 class WMMA_STORE_D_GLSTSO<string Geometry, string Layout, string Space, 7539 string Type, NVPTXRegClass regclass, 7540 bit WithStride, DAGOperand DstOp> 7541 : EmptyNVPTXInst, 7542 Requires<[!if(!eq(Geometry, "m16n16k16"), 7543 hasPTX60, 7544 hasPTX61), 7545 hasSM70]> { 7546 PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA" 7547 # "_" # Geometry # "_store_d" 7548 # "_" # Type 7549 # "_" # Layout 7550 # !subst(".", "_", Space) 7551 # !if(WithStride,"_stride", "") 7552 # "_Intr"); 7553 dag InsR03 = (ins DstOp:$src, regclass:$r0, regclass:$r1, 7554 regclass:$r2, regclass:$r3); 7555 dag InsR47 = (ins regclass:$r4, regclass:$r5, 7556 regclass:$r6, regclass:$r7); 7557 dag InsR = !if(!eq(Type,"f16"), InsR03, !con(InsR03, InsR47)); 7558 dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins)); 7559 dag Ins = !con(InsR, StrideArg); 7560 7561 // Construct the pattern to match corresponding intrinsic call. See the 7562 // details in the comments in WMMA_LOAD_ALSTOS. 7563 dag PatArgs = !foreach(tmp, Ins, 7564 !subst(imem, ADDRvar, 7565 !subst(MEMri64, ADDRri64, 7566 !subst(MEMri, ADDRri, 7567 !subst(ins, IntrMatcher, tmp))))); 7568 let Pattern = [PatArgs]; 7569 let OutOperandList = (outs); 7570 let InOperandList = Ins; 7571 let AsmString = "wmma.store.d.sync." 7572 # Layout 7573 # "." # Geometry 7574 # Space 7575 # "." # Type 7576 # " \t[$src]," 7577 # !if(!eq(Type,"f16"), 7578 "{{$r0, $r1, $r2, $r3}}", 7579 "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}") 7580 # !if(WithStride, ", $ldm", "") 7581 # ";"; 7582 7583 } 7584 7585 class WMMA_STORE_INTR_HELPER<string Geometry, string Layout, string Space, 7586 string Type, bit WithStride> 7587 : PatFrag <(ops),(ops)> { 7588 // Intrinsic that matches this instruction. 7589 Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_" 7590 # Geometry 7591 # "_store_d" 7592 # "_" # Type 7593 # "_" # Layout 7594 # !if(WithStride, "_stride", "")); 7595 code match_generic = [{ 7596 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 7597 }]; 7598 code match_shared = [{ 7599 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 7600 }]; 7601 code match_global = [{ 7602 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 7603 }]; 7604 7605 dag Args = !if(!eq(Type,"f16"), 7606 (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3), 7607 (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3, 7608 node:$r4, node:$r5, node:$r6, node:$r7)); 7609 dag StrideArg = !if(WithStride, (ops node:$ldm), (ops)); 7610 let Operands = !con(Args, StrideArg); 7611 let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))]; 7612 let PredicateCode = !if(!eq(Space, ".shared"), match_shared, 7613 !if(!eq(Space, ".global"), match_global, match_generic)); 7614 } 7615 7616 multiclass WMMA_STORE_D_GLSTS<string Geometry, string Layout, string Space, 7617 string Type, NVPTXRegClass regclass, 7618 bit WithStride> { 7619 def _avar: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass, 7620 WithStride, imem>; 7621 def _areg: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass, 7622 WithStride, Int32Regs>; 7623 def _areg64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass, 7624 WithStride, Int64Regs>; 7625 def _ari: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass, 7626 WithStride, MEMri>; 7627 def _ari64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass, 7628 WithStride, MEMri64>; 7629 } 7630 7631 multiclass WMMA_STORE_D_GLSTSh<string Geometry, string Layout, string Space, 7632 string Type, NVPTXRegClass regclass, 7633 bit WithStride> { 7634 // Define a PatFrag that matches appropriate intrinsic that loads from the 7635 // given address space. 7636 def _Intr: WMMA_STORE_INTR_HELPER<Geometry, Layout, Space, Type, 7637 WithStride>; 7638 defm NAME: WMMA_STORE_D_GLSTS<Geometry, Layout, Space, Type, regclass, 7639 WithStride>; 7640 } 7641 7642 multiclass WMMA_STORE_D_GLST<string Geometry, string Layout, string Space, 7643 string Type, NVPTXRegClass regclass > { 7644 defm _stride: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 1>; 7645 defm NAME: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 0>; 7646 } 7647 7648 multiclass WMMA_STORE_D_GLT<string Geometry, string Layout, 7649 string Type, NVPTXRegClass regclass> { 7650 defm _global: WMMA_STORE_D_GLST<Geometry, Layout, ".global", Type, regclass>; 7651 defm _shared: WMMA_STORE_D_GLST<Geometry, Layout, ".shared", Type, regclass>; 7652 defm NAME: WMMA_STORE_D_GLST<Geometry, Layout, "", Type, regclass>; 7653 } 7654 7655 multiclass WMMA_STORE_D_GT<string Geometry, string Type, 7656 NVPTXRegClass regclass> { 7657 defm _row: WMMA_STORE_D_GLT<Geometry, "row", Type, regclass>; 7658 defm _col: WMMA_STORE_D_GLT<Geometry, "col", Type, regclass>; 7659 } 7660 7661 multiclass WMMA_STORE_D_G<string Geometry> { 7662 defm _store_d_f16: WMMA_STORE_D_GT<Geometry, "f16", Float16x2Regs>; 7663 defm _store_d_f32: WMMA_STORE_D_GT<Geometry, "f32", Float32Regs>; 7664 } 7665 7666 defm INT_WMMA_m32n8k16: WMMA_STORE_D_G<"m32n8k16">; 7667 defm INT_WMMA_m16n16k16: WMMA_STORE_D_G<"m16n16k16">; 7668 defm INT_WMMA_m8n32k16: WMMA_STORE_D_G<"m8n32k16">; 7669 7670 // WMMA.MMA 7671 class WMMA_MMA_GABDCS<string Geometry, string ALayout, string BLayout, 7672 string DType, NVPTXRegClass d_reg, 7673 string CType, NVPTXRegClass c_reg, 7674 NVPTXRegClass ab_reg, 7675 string Satfinite = ""> 7676 : EmptyNVPTXInst, 7677 Requires<[!if(!eq(Geometry, "m16n16k16"), 7678 hasPTX60, 7679 hasPTX61), 7680 hasSM70]> { 7681 Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_" 7682 # Geometry 7683 # "_mma" 7684 # "_" # ALayout 7685 # "_" # BLayout 7686 # "_" # DType 7687 # "_" # CType 7688 # !subst(".", "_", Satfinite)); 7689 dag Outs = !if(!eq(DType,"f16"), 7690 (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3), 7691 (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3, 7692 d_reg:$d4, d_reg:$d5, d_reg:$d6, d_reg:$d7)); 7693 dag InsExtraCArgs = !if(!eq(CType,"f16"), 7694 (ins), 7695 (ins c_reg:$c4, c_reg:$c5, c_reg:$c6, c_reg:$c7)); 7696 dag Ins = !con((ins ab_reg:$a0, ab_reg:$a1, ab_reg:$a2, ab_reg:$a3, 7697 ab_reg:$a4, ab_reg:$a5, ab_reg:$a6, ab_reg:$a7, 7698 ab_reg:$b0, ab_reg:$b1, ab_reg:$b2, ab_reg:$b3, 7699 ab_reg:$b4, ab_reg:$b5, ab_reg:$b6, ab_reg:$b7, 7700 c_reg:$c0, c_reg:$c1, c_reg:$c2, c_reg:$c3), 7701 InsExtraCArgs); 7702 7703 // Construct the pattern to match corresponding intrinsic call. See the 7704 // details in the comments in WMMA_LOAD_ALSTOS. 7705 dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp)); 7706 dag PatArgs = !foreach(tmp, Ins, !subst(ins, Intr, tmp)); 7707 let Pattern = [!con(PatOuts, (set PatArgs))]; 7708 let OutOperandList = Outs; 7709 let InOperandList = Ins; 7710 let AsmString = "wmma.mma.sync." 7711 # ALayout 7712 # "." # BLayout 7713 # "." # Geometry 7714 # "." # DType 7715 # "." # CType 7716 # Satfinite # "\n\t\t" 7717 # !if(!eq(DType,"f16"), 7718 "{{$d0, $d1, $d2, $d3}}, \n\t\t", 7719 "{{$d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7}},\n\t\t") 7720 # "{{$a0, $a1, $a2, $a3, $a4, $a5, $a6, $a7}},\n\t\t" 7721 # "{{$b0, $b1, $b2, $b3, $b4, $b5, $b6, $b7}},\n\t\t" 7722 # !if(!eq(CType,"f16"), 7723 "{{$c0, $c1, $c2, $c3}};", 7724 "{{$c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7}};"); 7725 } 7726 7727 multiclass WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout, 7728 string DType, NVPTXRegClass d_reg, 7729 string CType, NVPTXRegClass c_reg> { 7730 def _satfinite: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout, 7731 DType, d_reg, CType, c_reg, 7732 Float16x2Regs, ".satfinite">; 7733 def NAME: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout, 7734 DType, d_reg, CType, c_reg, 7735 Float16x2Regs>; 7736 } 7737 7738 multiclass WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout, 7739 string DType, NVPTXRegClass d_reg> { 7740 defm _f16: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg, 7741 "f16", Float16x2Regs>; 7742 defm _f32: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg, 7743 "f32", Float32Regs>; 7744 } 7745 7746 multiclass WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> { 7747 defm _f16: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", Float16x2Regs>; 7748 defm _f32: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", Float32Regs>; 7749 } 7750 7751 multiclass WMMA_MMA_GA<string Geometry, string ALayout> { 7752 defm _col: WMMA_MMA_GAB<Geometry, ALayout, "col">; 7753 defm _row: WMMA_MMA_GAB<Geometry, ALayout, "row">; 7754 } 7755 7756 multiclass WMMA_MMA_G<string Geometry> { 7757 defm _col: WMMA_MMA_GA<Geometry, "col">; 7758 defm _row: WMMA_MMA_GA<Geometry, "row">; 7759 } 7760 7761 defm INT_WMMA_MMA_m32n8k16 : WMMA_MMA_G<"m32n8k16">; 7762 defm INT_WMMA_MMA_m16n16k16 : WMMA_MMA_G<"m16n16k16">; 7763 defm INT_WMMA_MMA_m8n32k16 : WMMA_MMA_G<"m8n32k16">; 7764