1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 def immFloat0 : PatLeaf<(fpimm), [{ 11 float f = (float)N->getValueAPF().convertToFloat(); 12 return (f==0.0f); 13 }]>; 14 15 def immFloat1 : PatLeaf<(fpimm), [{ 16 float f = (float)N->getValueAPF().convertToFloat(); 17 return (f==1.0f); 18 }]>; 19 20 def immDouble0 : PatLeaf<(fpimm), [{ 21 double d = (double)N->getValueAPF().convertToDouble(); 22 return (d==0.0); 23 }]>; 24 25 def immDouble1 : PatLeaf<(fpimm), [{ 26 double d = (double)N->getValueAPF().convertToDouble(); 27 return (d==1.0); 28 }]>; 29 30 31 32 //----------------------------------- 33 // Synchronization Functions 34 //----------------------------------- 35 def INT_CUDA_SYNCTHREADS : NVPTXInst<(outs), (ins), 36 "bar.sync \t0;", 37 [(int_cuda_syncthreads)]>; 38 def INT_BARRIER0 : NVPTXInst<(outs), (ins), 39 "bar.sync \t0;", 40 [(int_nvvm_barrier0)]>; 41 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 42 !strconcat("{{ \n\t", 43 !strconcat(".reg .pred \t%p1; \n\t", 44 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", 45 !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 46 !strconcat("}}", ""))))), 47 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 48 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 49 !strconcat("{{ \n\t", 50 !strconcat(".reg .pred \t%p1; \n\t", 51 !strconcat(".reg .pred \t%p2; \n\t", 52 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", 53 !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t", 54 !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t", 55 !strconcat("}}", ""))))))), 56 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 57 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 58 !strconcat("{{ \n\t", 59 !strconcat(".reg .pred \t%p1; \n\t", 60 !strconcat(".reg .pred \t%p2; \n\t", 61 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", 62 !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t", 63 !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t", 64 !strconcat("}}", ""))))))), 65 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 66 67 68 //----------------------------------- 69 // Explicit Memory Fence Functions 70 //----------------------------------- 71 class MEMBAR<string StrOp, Intrinsic IntOP> : 72 NVPTXInst<(outs), (ins), 73 StrOp, [(IntOP)]>; 74 75 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 76 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 77 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 78 79 80 //----------------------------------- 81 // Math Functions 82 //----------------------------------- 83 84 // Map min(1.0, max(0.0, x)) to sat(x) 85 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 86 // NaN 87 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 88 // Same story for fmax, fmin. 89 90 def : Pat<(int_nvvm_fmin_f immFloat1, 91 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 92 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 93 def : Pat<(int_nvvm_fmin_f immFloat1, 94 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 95 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 96 def : Pat<(int_nvvm_fmin_f 97 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 98 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 99 def : Pat<(int_nvvm_fmin_f 100 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 101 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 102 103 def : Pat<(int_nvvm_fmin_d immDouble1, 104 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 105 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 106 def : Pat<(int_nvvm_fmin_d immDouble1, 107 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 108 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 109 def : Pat<(int_nvvm_fmin_d 110 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 111 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 112 def : Pat<(int_nvvm_fmin_d 113 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 114 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 115 116 117 // We need a full string for OpcStr here because we need to deal with case like 118 // INT_PTX_RECIP. 119 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 120 NVPTXRegClass src_regclass, Intrinsic IntOP> 121 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 122 OpcStr, 123 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; 124 125 // We need a full string for OpcStr here because we need to deal with the case 126 // like INT_PTX_NATIVE_POWR_F. 127 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 128 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP> 129 : NVPTXInst<(outs t_regclass:$dst), 130 (ins s0_regclass:$src0, s1_regclass:$src1), 131 OpcStr, 132 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; 133 134 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 135 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 136 NVPTXRegClass s2_regclass, Intrinsic IntOP> 137 : NVPTXInst<(outs t_regclass:$dst), 138 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 139 OpcStr, 140 [(set t_regclass:$dst, 141 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; 142 143 // 144 // MISC 145 // 146 147 def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs, 148 int_nvvm_clz_i>; 149 def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs, 150 int_nvvm_clz_ll>; 151 152 def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs, 153 int_nvvm_popc_i>; 154 def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs, 155 int_nvvm_popc_ll>; 156 157 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 158 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 159 160 // 161 // Min Max 162 // 163 164 def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs, 165 Int32Regs, Int32Regs, int_nvvm_min_i>; 166 def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs, 167 Int32Regs, Int32Regs, int_nvvm_min_ui>; 168 169 def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs, 170 Int64Regs, Int64Regs, int_nvvm_min_ll>; 171 def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs, 172 Int64Regs, Int64Regs, int_nvvm_min_ull>; 173 174 def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs, 175 Int32Regs, Int32Regs, int_nvvm_max_i>; 176 def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs, 177 Int32Regs, Int32Regs, int_nvvm_max_ui>; 178 179 def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs, 180 Int64Regs, Int64Regs, int_nvvm_max_ll>; 181 def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs, 182 Int64Regs, Int64Regs, int_nvvm_max_ull>; 183 184 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 185 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 186 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 187 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 188 189 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 190 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 191 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 192 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 193 194 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 195 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 196 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 197 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 198 199 // 200 // Multiplication 201 // 202 203 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 204 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 205 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 206 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 207 208 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 209 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 210 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 211 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 212 213 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 214 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 215 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 216 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 217 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 218 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 219 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 220 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 221 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 222 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 223 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 224 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 225 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 226 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 227 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 228 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 229 230 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 231 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 232 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 233 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 234 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 235 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 236 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 237 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 238 239 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 240 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 241 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 242 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 243 244 // 245 // Div 246 // 247 248 def INT_NVVM_DIV_APPROX_FTZ_F 249 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 250 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 251 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 252 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 253 254 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 255 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 256 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 257 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 258 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 259 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 260 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 261 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 262 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 263 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 264 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 265 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 266 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 267 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 268 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 269 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 270 271 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 272 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 273 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 274 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 275 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 276 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 277 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 278 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 279 280 // 281 // Brev 282 // 283 284 def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs, 285 int_nvvm_brev32>; 286 def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs, 287 int_nvvm_brev64>; 288 289 // 290 // Sad 291 // 292 293 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 294 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 295 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 296 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 297 298 // 299 // Floor Ceil 300 // 301 302 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 303 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 304 def : Pat<(int_nvvm_floor_f Float32Regs:$a), 305 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 306 def : Pat<(int_nvvm_floor_d Float64Regs:$a), 307 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 308 309 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 310 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 311 def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 312 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 313 def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 314 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 315 316 // 317 // Abs 318 // 319 320 def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs, 321 int_nvvm_abs_i>; 322 def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs, 323 int_nvvm_abs_ll>; 324 325 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 326 Float32Regs, int_nvvm_fabs_ftz_f>; 327 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 328 Float32Regs, int_nvvm_fabs_f>; 329 330 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 331 Float64Regs, int_nvvm_fabs_d>; 332 333 // 334 // Round 335 // 336 337 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 338 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 339 def : Pat<(int_nvvm_round_f Float32Regs:$a), 340 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 341 def : Pat<(int_nvvm_round_d Float64Regs:$a), 342 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 343 344 // 345 // Trunc 346 // 347 348 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 349 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 350 def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 351 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 352 def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 353 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 354 355 // 356 // Saturate 357 // 358 359 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 360 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 361 def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 362 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 363 def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 364 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 365 366 // 367 // Exp2 Log2 368 // 369 370 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 371 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 372 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 373 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 374 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 375 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 376 377 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 378 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 379 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 380 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 381 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 382 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 383 384 // 385 // Sin Cos 386 // 387 388 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 389 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 390 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 391 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 392 393 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 394 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 395 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 396 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 397 398 // 399 // Fma 400 // 401 402 def INT_NVVM_FMA_RN_FTZ_F 403 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 404 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; 405 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", 406 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; 407 def INT_NVVM_FMA_RZ_FTZ_F 408 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 409 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; 410 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", 411 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; 412 def INT_NVVM_FMA_RM_FTZ_F 413 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 414 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; 415 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", 416 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; 417 def INT_NVVM_FMA_RP_FTZ_F 418 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 419 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; 420 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", 421 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; 422 423 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", 424 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; 425 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", 426 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; 427 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", 428 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; 429 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", 430 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; 431 432 // 433 // Rcp 434 // 435 436 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 437 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 438 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 439 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 440 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 441 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 442 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 443 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 444 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 445 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 446 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 447 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 448 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 449 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 450 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 451 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 452 453 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 454 Float64Regs, int_nvvm_rcp_rn_d>; 455 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 456 Float64Regs, int_nvvm_rcp_rz_d>; 457 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 458 Float64Regs, int_nvvm_rcp_rm_d>; 459 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 460 Float64Regs, int_nvvm_rcp_rp_d>; 461 462 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 463 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 464 465 // 466 // Sqrt 467 // 468 469 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 470 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 471 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 472 Float32Regs, int_nvvm_sqrt_rn_f>; 473 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 474 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 475 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 476 Float32Regs, int_nvvm_sqrt_rz_f>; 477 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 478 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 479 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 480 Float32Regs, int_nvvm_sqrt_rm_f>; 481 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 482 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 483 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 484 Float32Regs, int_nvvm_sqrt_rp_f>; 485 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 486 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 487 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 488 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 489 490 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 491 Float64Regs, int_nvvm_sqrt_rn_d>; 492 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 493 Float64Regs, int_nvvm_sqrt_rz_d>; 494 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 495 Float64Regs, int_nvvm_sqrt_rm_d>; 496 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 497 Float64Regs, int_nvvm_sqrt_rp_d>; 498 499 // nvvm_sqrt intrinsic 500 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 501 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 502 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 503 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 504 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 505 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 506 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 507 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 508 509 // 510 // Rsqrt 511 // 512 513 def INT_NVVM_RSQRT_APPROX_FTZ_F 514 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 515 int_nvvm_rsqrt_approx_ftz_f>; 516 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 517 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 518 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 519 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 520 521 // 522 // Add 523 // 524 525 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 526 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 527 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 528 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 529 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 530 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 531 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 532 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 533 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 534 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 535 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 536 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 537 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 538 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 539 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 540 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 541 542 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 543 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 544 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 545 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 546 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 547 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 548 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 549 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 550 551 // 552 // Convert 553 // 554 555 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 556 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 557 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 558 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 559 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 560 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 561 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 562 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 563 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 564 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 565 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 566 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 567 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 568 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 569 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 570 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 571 572 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 573 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 574 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 575 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 576 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 577 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 578 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 579 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 580 581 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 582 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 583 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 584 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 585 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 586 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 587 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 588 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 589 590 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 591 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 592 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 593 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 594 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 595 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 596 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 597 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 598 599 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 600 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 601 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 602 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 603 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 604 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 605 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 606 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 607 608 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 609 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 610 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 611 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 612 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 613 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 614 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 615 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 616 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 617 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 618 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 619 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 620 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 621 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 622 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 623 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 624 625 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 626 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 627 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 628 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 629 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 630 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 631 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 632 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 633 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 634 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 635 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 636 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 637 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 638 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 639 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 640 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 641 642 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 643 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 644 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 645 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 646 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 647 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 648 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 649 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 650 651 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 652 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 653 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 654 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 655 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 656 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 657 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 658 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 659 660 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 661 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 662 663 def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t", 664 !strconcat(".reg .b32 %temp; \n\t", 665 !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t", 666 "}}"))), 667 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 668 def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t", 669 !strconcat(".reg .b32 %temp; \n\t", 670 !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t", 671 "}}"))), 672 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 673 674 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 675 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 676 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 677 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 678 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 679 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 680 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 681 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 682 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 683 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 684 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 685 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 686 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 687 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 688 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 689 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 690 691 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 692 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 693 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 694 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 695 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 696 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 697 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 698 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 699 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 700 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 701 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 702 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 703 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 704 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 705 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 706 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 707 708 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 709 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 710 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 711 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 712 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 713 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 714 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 715 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 716 717 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 718 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 719 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 720 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 721 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 722 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 723 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 724 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 725 726 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 727 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 728 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 729 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 730 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 731 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 732 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 733 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 734 735 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 736 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 737 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 738 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 739 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 740 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 741 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 742 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 743 744 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 745 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 746 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 747 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 748 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 749 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 750 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 751 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 752 753 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 754 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 755 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 756 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 757 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 758 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 759 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 760 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 761 762 763 // FIXME: Ideally, we could use these patterns instead of the scope-creating 764 // patterns, but ptxas does not like these since .s16 is not compatible with 765 // .f16. The solution is to use .bXX for all integer register types, but we 766 // are not there yet. 767 //def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 768 // (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>; 769 //def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 770 // (CVT_f16_f32 Float32Regs:$a, CvtRN)>; 771 // 772 //def : Pat<(int_nvvm_h2f Int16Regs:$a), 773 // (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; 774 775 def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t", 776 !strconcat(".reg .b16 %temp;\n\t", 777 !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t", 778 !strconcat("mov.b16 \t$dst, %temp;\n", 779 "}}")))), 780 Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>; 781 def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t", 782 !strconcat(".reg .b16 %temp;\n\t", 783 !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t", 784 !strconcat("mov.b16 \t$dst, %temp;\n", 785 "}}")))), 786 Int16Regs, Float32Regs, int_nvvm_f2h_rn>; 787 788 def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t", 789 !strconcat(".reg .b16 %temp;\n\t", 790 !strconcat("mov.b16 \t%temp, $src0;\n\t", 791 !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t", 792 "}}")))), 793 Float32Regs, Int16Regs, int_nvvm_h2f>; 794 795 def : Pat<(f32 (f16_to_f32 Int16Regs:$a)), 796 (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; 797 def : Pat<(i16 (f32_to_f16 Float32Regs:$a)), 798 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; 799 def : Pat<(i16 (f32_to_f16 Float32Regs:$a)), 800 (CVT_f16_f32 Float32Regs:$a, CvtRN)>; 801 802 // 803 // Bitcast 804 // 805 806 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 807 Float32Regs, int_nvvm_bitcast_f2i>; 808 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 809 Int32Regs, int_nvvm_bitcast_i2f>; 810 811 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 812 Int64Regs, int_nvvm_bitcast_ll2d>; 813 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 814 Float64Regs, int_nvvm_bitcast_d2ll>; 815 816 //----------------------------------- 817 // Atomic Functions 818 //----------------------------------- 819 820 class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 821 : PatFrag<ops, frag, [{ 822 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 823 }]>; 824 class ATOMIC_SHARED_CHK <dag ops, dag frag> 825 : PatFrag<ops, frag, [{ 826 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 827 }]>; 828 class ATOMIC_GENERIC_CHK <dag ops, dag frag> 829 : PatFrag<ops, frag, [{ 830 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 831 }]>; 832 833 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 834 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 835 Operand IMMType, SDNode IMM, Predicate Pred> { 836 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 837 !strconcat("atom", 838 !strconcat(SpaceStr, 839 !strconcat(OpcStr, 840 !strconcat(TypeStr, 841 !strconcat(" \t$dst, [$addr], $b;", ""))))), 842 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 843 Requires<[Pred]>; 844 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 845 !strconcat("atom", 846 !strconcat(SpaceStr, 847 !strconcat(OpcStr, 848 !strconcat(TypeStr, 849 !strconcat(" \t$dst, [$addr], $b;", ""))))), 850 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, 851 Requires<[Pred]>; 852 } 853 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 854 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> { 855 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 856 IntOp, IMMType, IMM, Pred>; 857 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 858 IntOp, IMMType, IMM, Pred>; 859 } 860 861 // has 2 operands, neg the second one 862 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 863 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 864 Operand IMMType, Predicate Pred> { 865 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 866 !strconcat("{{ \n\t", 867 !strconcat(".reg \t.s", 868 !strconcat(TypeStr, 869 !strconcat(" temp; \n\t", 870 !strconcat("neg.s", 871 !strconcat(TypeStr, 872 !strconcat(" \ttemp, $b; \n\t", 873 !strconcat("atom", 874 !strconcat(SpaceStr, 875 !strconcat(OpcStr, 876 !strconcat(".u", 877 !strconcat(TypeStr, 878 !strconcat(" \t$dst, [$addr], temp; \n\t", 879 !strconcat("}}", "")))))))))))))), 880 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 881 Requires<[Pred]>; 882 } 883 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, 884 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, 885 Predicate Pred> { 886 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 887 IntOp, IMMType, Pred> ; 888 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 889 IntOp, IMMType, Pred> ; 890 } 891 892 // has 3 operands 893 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 894 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 895 Operand IMMType, Predicate Pred> { 896 def reg : NVPTXInst<(outs regclass:$dst), 897 (ins ptrclass:$addr, regclass:$b, regclass:$c), 898 !strconcat("atom", 899 !strconcat(SpaceStr, 900 !strconcat(OpcStr, 901 !strconcat(TypeStr, 902 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), 903 [(set regclass:$dst, 904 (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, 905 Requires<[Pred]>; 906 def imm1 : NVPTXInst<(outs regclass:$dst), 907 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 908 !strconcat("atom", 909 !strconcat(SpaceStr, 910 !strconcat(OpcStr, 911 !strconcat(TypeStr, 912 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), 913 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, 914 Requires<[Pred]>; 915 def imm2 : NVPTXInst<(outs regclass:$dst), 916 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 917 !strconcat("atom", 918 !strconcat(SpaceStr, 919 !strconcat(OpcStr, 920 !strconcat(TypeStr, 921 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), 922 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, 923 Requires<[Pred]>; 924 def imm3 : NVPTXInst<(outs regclass:$dst), 925 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 926 !strconcat("atom", 927 !strconcat(SpaceStr, 928 !strconcat(OpcStr, 929 !strconcat(TypeStr, 930 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), 931 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, 932 Requires<[Pred]>; 933 } 934 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 935 string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> { 936 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 937 IntOp, IMMType, Pred>; 938 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 939 IntOp, IMMType, Pred>; 940 } 941 942 // atom_add 943 944 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 945 (atomic_load_add_32 node:$a, node:$b)>; 946 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 947 (atomic_load_add_32 node:$a, node:$b)>; 948 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 949 (atomic_load_add_32 node:$a, node:$b)>; 950 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 951 (atomic_load_add_64 node:$a, node:$b)>; 952 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 953 (atomic_load_add_64 node:$a, node:$b)>; 954 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 955 (atomic_load_add_64 node:$a, node:$b)>; 956 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 957 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; 958 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 959 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; 960 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 961 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; 962 963 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", 964 atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>; 965 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", 966 atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>; 967 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", 968 atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>; 969 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 970 ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>; 971 972 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", 973 atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>; 974 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", 975 atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>; 976 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", 977 atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>; 978 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", 979 ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>; 980 981 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", 982 atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>; 983 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", 984 atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>; 985 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", 986 atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>; 987 988 // atom_sub 989 990 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 991 (atomic_load_sub_32 node:$a, node:$b)>; 992 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 993 (atomic_load_sub_32 node:$a, node:$b)>; 994 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 995 (atomic_load_sub_32 node:$a, node:$b)>; 996 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 997 (atomic_load_sub_64 node:$a, node:$b)>; 998 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 999 (atomic_load_sub_64 node:$a, node:$b)>; 1000 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1001 (atomic_load_sub_64 node:$a, node:$b)>; 1002 1003 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", 1004 atomic_load_sub_32_g, i32imm, hasAtomRedG32>; 1005 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", 1006 atomic_load_sub_64_g, i64imm, hasAtomRedG64>; 1007 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", 1008 atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>; 1009 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", 1010 ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>; 1011 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", 1012 atomic_load_sub_32_s, i32imm, hasAtomRedS32>; 1013 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", 1014 atomic_load_sub_64_s, i64imm, hasAtomRedS64>; 1015 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", 1016 atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>; 1017 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", 1018 ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>; 1019 1020 // atom_swap 1021 1022 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1023 (atomic_swap_32 node:$a, node:$b)>; 1024 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1025 (atomic_swap_32 node:$a, node:$b)>; 1026 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1027 (atomic_swap_32 node:$a, node:$b)>; 1028 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1029 (atomic_swap_64 node:$a, node:$b)>; 1030 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1031 (atomic_swap_64 node:$a, node:$b)>; 1032 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1033 (atomic_swap_64 node:$a, node:$b)>; 1034 1035 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", 1036 atomic_swap_32_g, i32imm, imm, hasAtomRedG32>; 1037 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", 1038 atomic_swap_32_s, i32imm, imm, hasAtomRedS32>; 1039 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", 1040 atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>; 1041 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1042 ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1043 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", 1044 atomic_swap_64_g, i64imm, imm, hasAtomRedG64>; 1045 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", 1046 atomic_swap_64_s, i64imm, imm, hasAtomRedS64>; 1047 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", 1048 atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>; 1049 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1050 ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>; 1051 1052 // atom_max 1053 1054 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1055 , (atomic_load_max_32 node:$a, node:$b)>; 1056 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1057 (atomic_load_max_32 node:$a, node:$b)>; 1058 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1059 (atomic_load_max_32 node:$a, node:$b)>; 1060 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1061 (atomic_load_umax_32 node:$a, node:$b)>; 1062 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1063 (atomic_load_umax_32 node:$a, node:$b)>; 1064 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1065 (atomic_load_umax_32 node:$a, node:$b)>; 1066 1067 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1068 ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>; 1069 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1070 ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>; 1071 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", 1072 atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>; 1073 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1074 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1075 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1076 ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>; 1077 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1078 ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>; 1079 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", 1080 atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>; 1081 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1082 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1083 1084 // atom_min 1085 1086 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1087 (atomic_load_min_32 node:$a, node:$b)>; 1088 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1089 (atomic_load_min_32 node:$a, node:$b)>; 1090 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1091 (atomic_load_min_32 node:$a, node:$b)>; 1092 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1093 (atomic_load_umin_32 node:$a, node:$b)>; 1094 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1095 (atomic_load_umin_32 node:$a, node:$b)>; 1096 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1097 (atomic_load_umin_32 node:$a, node:$b)>; 1098 1099 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1100 ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>; 1101 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1102 ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>; 1103 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", 1104 atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>; 1105 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1106 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1107 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1108 ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>; 1109 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1110 ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>; 1111 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", 1112 atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>; 1113 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1114 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1115 1116 // atom_inc atom_dec 1117 1118 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1119 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1120 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1121 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1122 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1123 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1124 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1125 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1126 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1127 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1128 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1129 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1130 1131 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", 1132 atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>; 1133 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", 1134 atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>; 1135 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", 1136 atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>; 1137 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1138 ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1139 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", 1140 atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>; 1141 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", 1142 atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>; 1143 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", 1144 atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>; 1145 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1146 ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1147 1148 // atom_and 1149 1150 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1151 (atomic_load_and_32 node:$a, node:$b)>; 1152 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1153 (atomic_load_and_32 node:$a, node:$b)>; 1154 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1155 (atomic_load_and_32 node:$a, node:$b)>; 1156 1157 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", 1158 atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>; 1159 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", 1160 atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>; 1161 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", 1162 atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>; 1163 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1164 ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1165 1166 // atom_or 1167 1168 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1169 (atomic_load_or_32 node:$a, node:$b)>; 1170 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1171 (atomic_load_or_32 node:$a, node:$b)>; 1172 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1173 (atomic_load_or_32 node:$a, node:$b)>; 1174 1175 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", 1176 atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>; 1177 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", 1178 atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>; 1179 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1180 ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1181 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", 1182 atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>; 1183 1184 // atom_xor 1185 1186 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1187 (atomic_load_xor_32 node:$a, node:$b)>; 1188 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1189 (atomic_load_xor_32 node:$a, node:$b)>; 1190 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1191 (atomic_load_xor_32 node:$a, node:$b)>; 1192 1193 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", 1194 atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>; 1195 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", 1196 atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>; 1197 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", 1198 atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>; 1199 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1200 ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>; 1201 1202 // atom_cas 1203 1204 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1205 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1206 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1207 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1208 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1209 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1210 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1211 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1212 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1213 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1214 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1215 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1216 1217 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", 1218 atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>; 1219 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", 1220 atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>; 1221 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", 1222 atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>; 1223 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", 1224 ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>; 1225 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", 1226 atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>; 1227 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", 1228 atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>; 1229 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", 1230 atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>; 1231 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", 1232 ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>; 1233 1234 1235 //----------------------------------- 1236 // Read Special Registers 1237 //----------------------------------- 1238 class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> : 1239 NVPTXInst<(outs regclassOut:$dst), (ins), 1240 OpStr, 1241 [(set regclassOut:$dst, (IntOp))]>; 1242 1243 def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs, 1244 int_nvvm_read_ptx_sreg_tid_x>; 1245 def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs, 1246 int_nvvm_read_ptx_sreg_tid_y>; 1247 def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs, 1248 int_nvvm_read_ptx_sreg_tid_z>; 1249 1250 def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs, 1251 int_nvvm_read_ptx_sreg_ntid_x>; 1252 def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs, 1253 int_nvvm_read_ptx_sreg_ntid_y>; 1254 def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs, 1255 int_nvvm_read_ptx_sreg_ntid_z>; 1256 1257 def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs, 1258 int_nvvm_read_ptx_sreg_ctaid_x>; 1259 def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs, 1260 int_nvvm_read_ptx_sreg_ctaid_y>; 1261 def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs, 1262 int_nvvm_read_ptx_sreg_ctaid_z>; 1263 1264 def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs, 1265 int_nvvm_read_ptx_sreg_nctaid_x>; 1266 def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs, 1267 int_nvvm_read_ptx_sreg_nctaid_y>; 1268 def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs, 1269 int_nvvm_read_ptx_sreg_nctaid_z>; 1270 1271 def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs, 1272 int_nvvm_read_ptx_sreg_warpsize>; 1273 1274 1275 //----------------------------------- 1276 // Support for ldu on sm_20 or later 1277 //----------------------------------- 1278 1279 def ldu_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldu_global_i node:$ptr), [{ 1280 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); 1281 return M->getMemoryVT() == MVT::i8; 1282 }]>; 1283 1284 // Scalar 1285 // @TODO: Revisit this, Changed imemAny to imem 1286 multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> { 1287 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1288 !strconcat("ldu.global.", TyStr), 1289 [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>; 1290 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1291 !strconcat("ldu.global.", TyStr), 1292 [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>; 1293 def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), 1294 !strconcat("ldu.global.", TyStr), 1295 [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, 1296 Requires<[hasLDU]>; 1297 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1298 !strconcat("ldu.global.", TyStr), 1299 [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>; 1300 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1301 !strconcat("ldu.global.", TyStr), 1302 [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>; 1303 } 1304 1305 multiclass LDU_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> { 1306 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1307 !strconcat("ldu.global.", TyStr), 1308 [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>; 1309 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1310 !strconcat("ldu.global.", TyStr), 1311 [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>; 1312 def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), 1313 !strconcat("ldu.global.", TyStr), 1314 [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, 1315 Requires<[hasLDU]>; 1316 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1317 !strconcat("ldu.global.", TyStr), 1318 [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>; 1319 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1320 !strconcat("ldu.global.", TyStr), 1321 [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>; 1322 } 1323 1324 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, 1325 ldu_i8>; 1326 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs, 1327 int_nvvm_ldu_global_i>; 1328 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs, 1329 int_nvvm_ldu_global_i>; 1330 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs, 1331 int_nvvm_ldu_global_i>; 1332 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs, 1333 int_nvvm_ldu_global_f>; 1334 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs, 1335 int_nvvm_ldu_global_f>; 1336 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs, 1337 int_nvvm_ldu_global_p>; 1338 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs, 1339 int_nvvm_ldu_global_p>; 1340 1341 // vector 1342 1343 // Elementized vector ldu 1344 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1345 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1346 (ins Int32Regs:$src), 1347 !strconcat("ldu.global.", TyStr), []>; 1348 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1349 (ins Int64Regs:$src), 1350 !strconcat("ldu.global.", TyStr), []>; 1351 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1352 (ins MEMri:$src), 1353 !strconcat("ldu.global.", TyStr), []>; 1354 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1355 (ins MEMri64:$src), 1356 !strconcat("ldu.global.", TyStr), []>; 1357 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1358 (ins imemAny:$src), 1359 !strconcat("ldu.global.", TyStr), []>; 1360 } 1361 1362 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1363 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1364 regclass:$dst4), (ins Int32Regs:$src), 1365 !strconcat("ldu.global.", TyStr), []>; 1366 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1367 regclass:$dst4), (ins Int64Regs:$src), 1368 !strconcat("ldu.global.", TyStr), []>; 1369 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1370 regclass:$dst4), (ins MEMri:$src), 1371 !strconcat("ldu.global.", TyStr), []>; 1372 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1373 regclass:$dst4), (ins MEMri64:$src), 1374 !strconcat("ldu.global.", TyStr), []>; 1375 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1376 regclass:$dst4), (ins imemAny:$src), 1377 !strconcat("ldu.global.", TyStr), []>; 1378 } 1379 1380 defm INT_PTX_LDU_G_v2i8_ELE 1381 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1382 defm INT_PTX_LDU_G_v2i16_ELE 1383 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1384 defm INT_PTX_LDU_G_v2i32_ELE 1385 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1386 defm INT_PTX_LDU_G_v2f32_ELE 1387 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1388 defm INT_PTX_LDU_G_v2i64_ELE 1389 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1390 defm INT_PTX_LDU_G_v2f64_ELE 1391 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1392 defm INT_PTX_LDU_G_v4i8_ELE 1393 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1394 defm INT_PTX_LDU_G_v4i16_ELE 1395 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1396 Int16Regs>; 1397 defm INT_PTX_LDU_G_v4i32_ELE 1398 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1399 Int32Regs>; 1400 defm INT_PTX_LDU_G_v4f32_ELE 1401 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1402 Float32Regs>; 1403 1404 1405 //----------------------------------- 1406 // Support for ldg on sm_35 or later 1407 //----------------------------------- 1408 1409 def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{ 1410 MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); 1411 return M->getMemoryVT() == MVT::i8; 1412 }]>; 1413 1414 multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> { 1415 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1416 !strconcat("ld.global.nc.", TyStr), 1417 [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>; 1418 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1419 !strconcat("ld.global.nc.", TyStr), 1420 [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>; 1421 def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), 1422 !strconcat("ld.global.nc.", TyStr), 1423 [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, 1424 Requires<[hasLDG]>; 1425 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1426 !strconcat("ld.global.nc.", TyStr), 1427 [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>; 1428 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1429 !strconcat("ld.global.nc.", TyStr), 1430 [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>; 1431 } 1432 1433 multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> { 1434 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1435 !strconcat("ld.global.nc.", TyStr), 1436 [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>; 1437 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1438 !strconcat("ld.global.nc.", TyStr), 1439 [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>; 1440 def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), 1441 !strconcat("ld.global.nc.", TyStr), 1442 [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, 1443 Requires<[hasLDG]>; 1444 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1445 !strconcat("ld.global.nc.", TyStr), 1446 [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>; 1447 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1448 !strconcat("ld.global.nc.", TyStr), 1449 [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>; 1450 } 1451 1452 defm INT_PTX_LDG_GLOBAL_i8 1453 : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>; 1454 defm INT_PTX_LDG_GLOBAL_i16 1455 : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>; 1456 defm INT_PTX_LDG_GLOBAL_i32 1457 : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>; 1458 defm INT_PTX_LDG_GLOBAL_i64 1459 : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>; 1460 defm INT_PTX_LDG_GLOBAL_f32 1461 : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>; 1462 defm INT_PTX_LDG_GLOBAL_f64 1463 : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>; 1464 defm INT_PTX_LDG_GLOBAL_p32 1465 : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>; 1466 defm INT_PTX_LDG_GLOBAL_p64 1467 : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>; 1468 1469 // vector 1470 1471 // Elementized vector ldg 1472 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1473 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1474 (ins Int32Regs:$src), 1475 !strconcat("ld.global.nc.", TyStr), []>; 1476 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1477 (ins Int64Regs:$src), 1478 !strconcat("ld.global.nc.", TyStr), []>; 1479 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1480 (ins MEMri:$src), 1481 !strconcat("ld.global.nc.", TyStr), []>; 1482 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1483 (ins MEMri64:$src), 1484 !strconcat("ld.global.nc.", TyStr), []>; 1485 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1486 (ins imemAny:$src), 1487 !strconcat("ld.global.nc.", TyStr), []>; 1488 } 1489 1490 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1491 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1492 regclass:$dst4), (ins Int32Regs:$src), 1493 !strconcat("ld.global.nc.", TyStr), []>; 1494 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1495 regclass:$dst4), (ins Int64Regs:$src), 1496 !strconcat("ld.global.nc.", TyStr), []>; 1497 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1498 regclass:$dst4), (ins MEMri:$src), 1499 !strconcat("ld.global.nc.", TyStr), []>; 1500 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1501 regclass:$dst4), (ins MEMri64:$src), 1502 !strconcat("ld.global.nc.", TyStr), []>; 1503 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1504 regclass:$dst4), (ins imemAny:$src), 1505 !strconcat("ld.global.nc.", TyStr), []>; 1506 } 1507 1508 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 1509 defm INT_PTX_LDG_G_v2i8_ELE 1510 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1511 defm INT_PTX_LDG_G_v2i16_ELE 1512 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1513 defm INT_PTX_LDG_G_v2i32_ELE 1514 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1515 defm INT_PTX_LDG_G_v2f32_ELE 1516 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1517 defm INT_PTX_LDG_G_v2i64_ELE 1518 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1519 defm INT_PTX_LDG_G_v2f64_ELE 1520 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1521 defm INT_PTX_LDG_G_v4i8_ELE 1522 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1523 defm INT_PTX_LDG_G_v4i16_ELE 1524 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1525 defm INT_PTX_LDG_G_v4i32_ELE 1526 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 1527 defm INT_PTX_LDG_G_v4f32_ELE 1528 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 1529 1530 1531 multiclass NG_TO_G<string Str, Intrinsic Intrin> { 1532 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1533 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")), 1534 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>, 1535 Requires<[hasGenericLdSt]>; 1536 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1537 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")), 1538 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>, 1539 Requires<[hasGenericLdSt]>; 1540 1541 // @TODO: Are these actually needed? I believe global addresses will be copied 1542 // to register values anyway. 1543 /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src), 1544 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")), 1545 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>, 1546 Requires<[hasGenericLdSt]>; 1547 def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src), 1548 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")), 1549 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>, 1550 Requires<[hasGenericLdSt]>;*/ 1551 1552 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1553 "mov.u32 \t$result, $src;", 1554 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 1555 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1556 "mov.u64 \t$result, $src;", 1557 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 1558 1559 // @TODO: Are these actually needed? I believe global addresses will be copied 1560 // to register values anyway. 1561 /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src), 1562 "mov.u32 \t$result, $src;", 1563 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>; 1564 def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 1565 "mov.u64 \t$result, $src;", 1566 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/ 1567 } 1568 1569 multiclass G_TO_NG<string Str, Intrinsic Intrin> { 1570 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1571 !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")), 1572 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>, 1573 Requires<[hasGenericLdSt]>; 1574 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1575 !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")), 1576 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>, 1577 Requires<[hasGenericLdSt]>; 1578 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1579 "mov.u32 \t$result, $src;", 1580 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 1581 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1582 "mov.u64 \t$result, $src;", 1583 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 1584 } 1585 1586 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 1587 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 1588 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 1589 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 1590 1591 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 1592 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 1593 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 1594 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 1595 1596 1597 // nvvm.ptr.gen.to.param 1598 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 1599 (ins Int32Regs:$src), 1600 "mov.u32 \t$result, $src;", 1601 [(set Int32Regs:$result, 1602 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 1603 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 1604 (ins Int64Regs:$src), 1605 "mov.u64 \t$result, $src;", 1606 [(set Int64Regs:$result, 1607 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 1608 1609 1610 // nvvm.move intrinsicc 1611 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 1612 "mov.b16 \t$r, $s;", 1613 [(set Int16Regs:$r, 1614 (int_nvvm_move_i16 Int16Regs:$s))]>; 1615 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 1616 "mov.b32 \t$r, $s;", 1617 [(set Int32Regs:$r, 1618 (int_nvvm_move_i32 Int32Regs:$s))]>; 1619 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 1620 "mov.b64 \t$r, $s;", 1621 [(set Int64Regs:$r, 1622 (int_nvvm_move_i64 Int64Regs:$s))]>; 1623 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 1624 "mov.f32 \t$r, $s;", 1625 [(set Float32Regs:$r, 1626 (int_nvvm_move_float Float32Regs:$s))]>; 1627 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 1628 "mov.f64 \t$r, $s;", 1629 [(set Float64Regs:$r, 1630 (int_nvvm_move_double Float64Regs:$s))]>; 1631 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 1632 "mov.u32 \t$r, $s;", 1633 [(set Int32Regs:$r, 1634 (int_nvvm_move_ptr Int32Regs:$s))]>; 1635 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 1636 "mov.u64 \t$r, $s;", 1637 [(set Int64Regs:$r, 1638 (int_nvvm_move_ptr Int64Regs:$s))]>; 1639 1640 // @TODO: Are these actually needed, or will we always just see symbols 1641 // copied to registers first? 1642 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 1643 "mov.u32 \t$r, $s;", 1644 [(set Int32Regs:$r, 1645 (int_nvvm_move_ptr texternalsym:$s))]>; 1646 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 1647 "mov.u64 \t$r, $s;", 1648 [(set Int64Regs:$r, 1649 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 1650 1651 1652 // MoveParam %r1, param 1653 // ptr_local_to_gen %r2, %r1 1654 // ptr_gen_to_local %r3, %r2 1655 // -> 1656 // mov %r1, param 1657 1658 // @TODO: Revisit this. There is a type 1659 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 1660 // instructions are not currently defined. However, we can use the ptr 1661 // variants and the asm printer will do the right thing. 1662 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 1663 (MoveParam texternalsym:$src)))), 1664 (nvvm_move_ptr64 texternalsym:$src)>; 1665 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 1666 (MoveParam texternalsym:$src)))), 1667 (nvvm_move_ptr32 texternalsym:$src)>; 1668 1669 1670 //----------------------------------- 1671 // Compiler Error Warn 1672 // - Just ignore them in codegen 1673 //----------------------------------- 1674 1675 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 1676 "// llvm.nvvm.compiler.warn()", 1677 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 1678 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 1679 "// llvm.nvvm.compiler.warn()", 1680 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 1681 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 1682 "// llvm.nvvm.compiler.error()", 1683 [(int_nvvm_compiler_error Int32Regs:$a)]>; 1684 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 1685 "// llvm.nvvm.compiler.error()", 1686 [(int_nvvm_compiler_error Int64Regs:$a)]>; 1687 1688 1689 1690 //===-- Old PTX Back-end Intrinsics ---------------------------------------===// 1691 1692 // These intrinsics are handled to retain compatibility with the old backend. 1693 1694 // PTX Special Purpose Register Accessor Intrinsics 1695 1696 class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop> 1697 : NVPTXInst<(outs Int64Regs:$d), (ins), 1698 !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"), 1699 [(set Int64Regs:$d, (intop))]>; 1700 1701 class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> 1702 : NVPTXInst<(outs Int32Regs:$d), (ins), 1703 !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"), 1704 [(set Int32Regs:$d, (intop))]>; 1705 1706 // TODO Add read vector-version of special registers 1707 1708 def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", 1709 int_ptx_read_tid_x>; 1710 def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", 1711 int_ptx_read_tid_y>; 1712 def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", 1713 int_ptx_read_tid_z>; 1714 def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", 1715 int_ptx_read_tid_w>; 1716 1717 def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", 1718 int_ptx_read_ntid_x>; 1719 def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", 1720 int_ptx_read_ntid_y>; 1721 def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", 1722 int_ptx_read_ntid_z>; 1723 def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", 1724 int_ptx_read_ntid_w>; 1725 1726 def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", 1727 int_ptx_read_laneid>; 1728 def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", 1729 int_ptx_read_warpid>; 1730 def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", 1731 int_ptx_read_nwarpid>; 1732 1733 def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", 1734 int_ptx_read_ctaid_x>; 1735 def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", 1736 int_ptx_read_ctaid_y>; 1737 def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", 1738 int_ptx_read_ctaid_z>; 1739 def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", 1740 int_ptx_read_ctaid_w>; 1741 1742 def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", 1743 int_ptx_read_nctaid_x>; 1744 def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", 1745 int_ptx_read_nctaid_y>; 1746 def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", 1747 int_ptx_read_nctaid_z>; 1748 def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", 1749 int_ptx_read_nctaid_w>; 1750 1751 def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", 1752 int_ptx_read_smid>; 1753 def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", 1754 int_ptx_read_nsmid>; 1755 def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", 1756 int_ptx_read_gridid>; 1757 1758 def PTX_READ_LANEMASK_EQ 1759 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; 1760 def PTX_READ_LANEMASK_LE 1761 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>; 1762 def PTX_READ_LANEMASK_LT 1763 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>; 1764 def PTX_READ_LANEMASK_GE 1765 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>; 1766 def PTX_READ_LANEMASK_GT 1767 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>; 1768 1769 def PTX_READ_CLOCK 1770 : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>; 1771 def PTX_READ_CLOCK64 1772 : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>; 1773 1774 def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>; 1775 def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>; 1776 def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>; 1777 def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>; 1778 1779 // PTX Parallel Synchronization and Communication Intrinsics 1780 1781 def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;", 1782 [(int_ptx_bar_sync imm:$i)]>; 1783