1 //===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines all of the R600-specific intrinsics. 11 // 12 //===----------------------------------------------------------------------===// 13 14 class AMDGPUReadPreloadRegisterIntrinsic 15 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; 16 17 class AMDGPUReadPreloadRegisterIntrinsicNamed<string name> 18 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, GCCBuiltin<name>; 19 20 let TargetPrefix = "r600" in { 21 22 multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz { 23 def _x : AMDGPUReadPreloadRegisterIntrinsic; 24 def _y : AMDGPUReadPreloadRegisterIntrinsic; 25 def _z : AMDGPUReadPreloadRegisterIntrinsic; 26 } 27 28 multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> { 29 def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>; 30 def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>; 31 def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>; 32 } 33 34 defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 35 <"__builtin_r600_read_global_size">; 36 defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 37 <"__builtin_r600_read_ngroups">; 38 defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 39 <"__builtin_r600_read_tgid">; 40 41 defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz; 42 defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz; 43 44 def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">, 45 Intrinsic<[], [], [IntrConvergent]>; 46 47 // AS 7 is PARAM_I_ADDRESS, used for kernel arguments 48 def int_r600_implicitarg_ptr : 49 GCCBuiltin<"__builtin_r600_implicitarg_ptr">, 50 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], [IntrNoMem]>; 51 52 def int_r600_rat_store_typed : 53 // 1st parameter: Data 54 // 2nd parameter: Index 55 // 3rd parameter: Constant RAT ID 56 Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>, 57 GCCBuiltin<"__builtin_r600_rat_store_typed">; 58 59 def int_r600_recipsqrt_ieee : Intrinsic< 60 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 61 >; 62 63 def int_r600_recipsqrt_clamped : Intrinsic< 64 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 65 >; 66 67 def int_r600_cube : Intrinsic< 68 [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem] 69 >; 70 71 } // End TargetPrefix = "r600" 72 73 let TargetPrefix = "amdgcn" in { 74 75 //===----------------------------------------------------------------------===// 76 // ABI Special Intrinsics 77 //===----------------------------------------------------------------------===// 78 79 defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz; 80 defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named 81 <"__builtin_amdgcn_workgroup_id">; 82 83 def int_amdgcn_dispatch_ptr : 84 GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, 85 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>; 86 87 def int_amdgcn_queue_ptr : 88 GCCBuiltin<"__builtin_amdgcn_queue_ptr">, 89 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>; 90 91 def int_amdgcn_kernarg_segment_ptr : 92 GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, 93 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>; 94 95 def int_amdgcn_implicitarg_ptr : 96 GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, 97 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>; 98 99 def int_amdgcn_groupstaticsize : 100 GCCBuiltin<"__builtin_amdgcn_groupstaticsize">, 101 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; 102 103 def int_amdgcn_dispatch_id : 104 GCCBuiltin<"__builtin_amdgcn_dispatch_id">, 105 Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; 106 107 def int_amdgcn_implicit_buffer_ptr : 108 GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, 109 Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>; 110 111 //===----------------------------------------------------------------------===// 112 // Instruction Intrinsics 113 //===----------------------------------------------------------------------===// 114 115 // The first parameter is s_sendmsg immediate (i16), 116 // the second one is copied to m0 117 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, 118 Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; 119 def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, 120 Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; 121 122 def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, 123 Intrinsic<[], [], [IntrConvergent]>; 124 125 def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">, 126 Intrinsic<[], [], [IntrConvergent]>; 127 128 def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">, 129 Intrinsic<[], [llvm_i32_ty], []>; 130 131 def int_amdgcn_div_scale : Intrinsic< 132 // 1st parameter: Numerator 133 // 2nd parameter: Denominator 134 // 3rd parameter: Constant to select select between first and 135 // second. (0 = first, 1 = second). 136 [llvm_anyfloat_ty, llvm_i1_ty], 137 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], 138 [IntrNoMem] 139 >; 140 141 def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty], 142 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], 143 [IntrNoMem] 144 >; 145 146 def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty], 147 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], 148 [IntrNoMem] 149 >; 150 151 def int_amdgcn_trig_preop : Intrinsic< 152 [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem] 153 >; 154 155 def int_amdgcn_sin : Intrinsic< 156 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 157 >; 158 159 def int_amdgcn_cos : Intrinsic< 160 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 161 >; 162 163 def int_amdgcn_log_clamp : Intrinsic< 164 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 165 >; 166 167 def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">, 168 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] 169 >; 170 171 def int_amdgcn_rcp : Intrinsic< 172 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 173 >; 174 175 def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">, 176 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] 177 >; 178 179 def int_amdgcn_rsq : Intrinsic< 180 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 181 >; 182 183 def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">, 184 Intrinsic< 185 [llvm_float_ty], [llvm_float_ty], [IntrNoMem] 186 >; 187 188 def int_amdgcn_rsq_clamp : Intrinsic< 189 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; 190 191 def int_amdgcn_ldexp : Intrinsic< 192 [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem] 193 >; 194 195 def int_amdgcn_frexp_mant : Intrinsic< 196 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 197 >; 198 199 def int_amdgcn_frexp_exp : Intrinsic< 200 [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem] 201 >; 202 203 // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0 204 // and always uses rtz, so is not suitable for implementing the OpenCL 205 // fract function. It should be ok on VI. 206 def int_amdgcn_fract : Intrinsic< 207 [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] 208 >; 209 210 def int_amdgcn_cvt_pkrtz : Intrinsic< 211 [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] 212 >; 213 214 def int_amdgcn_class : Intrinsic< 215 [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem] 216 >; 217 218 def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">, 219 Intrinsic<[llvm_anyfloat_ty], 220 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem] 221 >; 222 223 def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">, 224 Intrinsic<[llvm_float_ty], 225 [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] 226 >; 227 228 def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">, 229 Intrinsic<[llvm_float_ty], 230 [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] 231 >; 232 233 def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">, 234 Intrinsic<[llvm_float_ty], 235 [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] 236 >; 237 238 def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">, 239 Intrinsic<[llvm_float_ty], 240 [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] 241 >; 242 243 // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz 244 // should be used. 245 def int_amdgcn_sffbh : 246 Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; 247 248 249 // Fields should mirror atomicrmw 250 class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty], 251 [llvm_anyptr_ty, 252 LLVMMatchType<0>, 253 llvm_i32_ty, // ordering 254 llvm_i32_ty, // scope 255 llvm_i1_ty], // isVolatile 256 [IntrArgMemOnly, NoCapture<0>] 257 >; 258 259 def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin; 260 def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin; 261 262 class AMDGPUImageLoad : Intrinsic < 263 [llvm_anyfloat_ty], // vdata(VGPR) 264 [llvm_anyint_ty, // vaddr(VGPR) 265 llvm_anyint_ty, // rsrc(SGPR) 266 llvm_i32_ty, // dmask(imm) 267 llvm_i1_ty, // glc(imm) 268 llvm_i1_ty, // slc(imm) 269 llvm_i1_ty, // lwe(imm) 270 llvm_i1_ty], // da(imm) 271 [IntrReadMem]>; 272 273 def int_amdgcn_image_load : AMDGPUImageLoad; 274 def int_amdgcn_image_load_mip : AMDGPUImageLoad; 275 def int_amdgcn_image_getresinfo : AMDGPUImageLoad; 276 277 class AMDGPUImageStore : Intrinsic < 278 [], 279 [llvm_anyfloat_ty, // vdata(VGPR) 280 llvm_anyint_ty, // vaddr(VGPR) 281 llvm_anyint_ty, // rsrc(SGPR) 282 llvm_i32_ty, // dmask(imm) 283 llvm_i1_ty, // glc(imm) 284 llvm_i1_ty, // slc(imm) 285 llvm_i1_ty, // lwe(imm) 286 llvm_i1_ty], // da(imm) 287 []>; 288 289 def int_amdgcn_image_store : AMDGPUImageStore; 290 def int_amdgcn_image_store_mip : AMDGPUImageStore; 291 292 class AMDGPUImageSample : Intrinsic < 293 [llvm_anyfloat_ty], // vdata(VGPR) 294 [llvm_anyfloat_ty, // vaddr(VGPR) 295 llvm_anyint_ty, // rsrc(SGPR) 296 llvm_v4i32_ty, // sampler(SGPR) 297 llvm_i32_ty, // dmask(imm) 298 llvm_i1_ty, // unorm(imm) 299 llvm_i1_ty, // glc(imm) 300 llvm_i1_ty, // slc(imm) 301 llvm_i1_ty, // lwe(imm) 302 llvm_i1_ty], // da(imm) 303 [IntrReadMem]>; 304 305 // Basic sample 306 def int_amdgcn_image_sample : AMDGPUImageSample; 307 def int_amdgcn_image_sample_cl : AMDGPUImageSample; 308 def int_amdgcn_image_sample_d : AMDGPUImageSample; 309 def int_amdgcn_image_sample_d_cl : AMDGPUImageSample; 310 def int_amdgcn_image_sample_l : AMDGPUImageSample; 311 def int_amdgcn_image_sample_b : AMDGPUImageSample; 312 def int_amdgcn_image_sample_b_cl : AMDGPUImageSample; 313 def int_amdgcn_image_sample_lz : AMDGPUImageSample; 314 def int_amdgcn_image_sample_cd : AMDGPUImageSample; 315 def int_amdgcn_image_sample_cd_cl : AMDGPUImageSample; 316 317 // Sample with comparison 318 def int_amdgcn_image_sample_c : AMDGPUImageSample; 319 def int_amdgcn_image_sample_c_cl : AMDGPUImageSample; 320 def int_amdgcn_image_sample_c_d : AMDGPUImageSample; 321 def int_amdgcn_image_sample_c_d_cl : AMDGPUImageSample; 322 def int_amdgcn_image_sample_c_l : AMDGPUImageSample; 323 def int_amdgcn_image_sample_c_b : AMDGPUImageSample; 324 def int_amdgcn_image_sample_c_b_cl : AMDGPUImageSample; 325 def int_amdgcn_image_sample_c_lz : AMDGPUImageSample; 326 def int_amdgcn_image_sample_c_cd : AMDGPUImageSample; 327 def int_amdgcn_image_sample_c_cd_cl : AMDGPUImageSample; 328 329 // Sample with offsets 330 def int_amdgcn_image_sample_o : AMDGPUImageSample; 331 def int_amdgcn_image_sample_cl_o : AMDGPUImageSample; 332 def int_amdgcn_image_sample_d_o : AMDGPUImageSample; 333 def int_amdgcn_image_sample_d_cl_o : AMDGPUImageSample; 334 def int_amdgcn_image_sample_l_o : AMDGPUImageSample; 335 def int_amdgcn_image_sample_b_o : AMDGPUImageSample; 336 def int_amdgcn_image_sample_b_cl_o : AMDGPUImageSample; 337 def int_amdgcn_image_sample_lz_o : AMDGPUImageSample; 338 def int_amdgcn_image_sample_cd_o : AMDGPUImageSample; 339 def int_amdgcn_image_sample_cd_cl_o : AMDGPUImageSample; 340 341 // Sample with comparison and offsets 342 def int_amdgcn_image_sample_c_o : AMDGPUImageSample; 343 def int_amdgcn_image_sample_c_cl_o : AMDGPUImageSample; 344 def int_amdgcn_image_sample_c_d_o : AMDGPUImageSample; 345 def int_amdgcn_image_sample_c_d_cl_o : AMDGPUImageSample; 346 def int_amdgcn_image_sample_c_l_o : AMDGPUImageSample; 347 def int_amdgcn_image_sample_c_b_o : AMDGPUImageSample; 348 def int_amdgcn_image_sample_c_b_cl_o : AMDGPUImageSample; 349 def int_amdgcn_image_sample_c_lz_o : AMDGPUImageSample; 350 def int_amdgcn_image_sample_c_cd_o : AMDGPUImageSample; 351 def int_amdgcn_image_sample_c_cd_cl_o : AMDGPUImageSample; 352 353 // Basic gather4 354 def int_amdgcn_image_gather4 : AMDGPUImageSample; 355 def int_amdgcn_image_gather4_cl : AMDGPUImageSample; 356 def int_amdgcn_image_gather4_l : AMDGPUImageSample; 357 def int_amdgcn_image_gather4_b : AMDGPUImageSample; 358 def int_amdgcn_image_gather4_b_cl : AMDGPUImageSample; 359 def int_amdgcn_image_gather4_lz : AMDGPUImageSample; 360 361 // Gather4 with comparison 362 def int_amdgcn_image_gather4_c : AMDGPUImageSample; 363 def int_amdgcn_image_gather4_c_cl : AMDGPUImageSample; 364 def int_amdgcn_image_gather4_c_l : AMDGPUImageSample; 365 def int_amdgcn_image_gather4_c_b : AMDGPUImageSample; 366 def int_amdgcn_image_gather4_c_b_cl : AMDGPUImageSample; 367 def int_amdgcn_image_gather4_c_lz : AMDGPUImageSample; 368 369 // Gather4 with offsets 370 def int_amdgcn_image_gather4_o : AMDGPUImageSample; 371 def int_amdgcn_image_gather4_cl_o : AMDGPUImageSample; 372 def int_amdgcn_image_gather4_l_o : AMDGPUImageSample; 373 def int_amdgcn_image_gather4_b_o : AMDGPUImageSample; 374 def int_amdgcn_image_gather4_b_cl_o : AMDGPUImageSample; 375 def int_amdgcn_image_gather4_lz_o : AMDGPUImageSample; 376 377 // Gather4 with comparison and offsets 378 def int_amdgcn_image_gather4_c_o : AMDGPUImageSample; 379 def int_amdgcn_image_gather4_c_cl_o : AMDGPUImageSample; 380 def int_amdgcn_image_gather4_c_l_o : AMDGPUImageSample; 381 def int_amdgcn_image_gather4_c_b_o : AMDGPUImageSample; 382 def int_amdgcn_image_gather4_c_b_cl_o : AMDGPUImageSample; 383 def int_amdgcn_image_gather4_c_lz_o : AMDGPUImageSample; 384 385 def int_amdgcn_image_getlod : AMDGPUImageSample; 386 387 class AMDGPUImageAtomic : Intrinsic < 388 [llvm_i32_ty], 389 [llvm_i32_ty, // vdata(VGPR) 390 llvm_anyint_ty, // vaddr(VGPR) 391 llvm_v8i32_ty, // rsrc(SGPR) 392 llvm_i1_ty, // r128(imm) 393 llvm_i1_ty, // da(imm) 394 llvm_i1_ty], // slc(imm) 395 []>; 396 397 def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic; 398 def int_amdgcn_image_atomic_add : AMDGPUImageAtomic; 399 def int_amdgcn_image_atomic_sub : AMDGPUImageAtomic; 400 def int_amdgcn_image_atomic_smin : AMDGPUImageAtomic; 401 def int_amdgcn_image_atomic_umin : AMDGPUImageAtomic; 402 def int_amdgcn_image_atomic_smax : AMDGPUImageAtomic; 403 def int_amdgcn_image_atomic_umax : AMDGPUImageAtomic; 404 def int_amdgcn_image_atomic_and : AMDGPUImageAtomic; 405 def int_amdgcn_image_atomic_or : AMDGPUImageAtomic; 406 def int_amdgcn_image_atomic_xor : AMDGPUImageAtomic; 407 def int_amdgcn_image_atomic_inc : AMDGPUImageAtomic; 408 def int_amdgcn_image_atomic_dec : AMDGPUImageAtomic; 409 def int_amdgcn_image_atomic_cmpswap : Intrinsic < 410 [llvm_i32_ty], 411 [llvm_i32_ty, // src(VGPR) 412 llvm_i32_ty, // cmp(VGPR) 413 llvm_anyint_ty, // vaddr(VGPR) 414 llvm_v8i32_ty, // rsrc(SGPR) 415 llvm_i1_ty, // r128(imm) 416 llvm_i1_ty, // da(imm) 417 llvm_i1_ty], // slc(imm) 418 []>; 419 420 class AMDGPUBufferLoad : Intrinsic < 421 [llvm_anyfloat_ty], 422 [llvm_v4i32_ty, // rsrc(SGPR) 423 llvm_i32_ty, // vindex(VGPR) 424 llvm_i32_ty, // offset(SGPR/VGPR/imm) 425 llvm_i1_ty, // glc(imm) 426 llvm_i1_ty], // slc(imm) 427 [IntrReadMem]>; 428 def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; 429 def int_amdgcn_buffer_load : AMDGPUBufferLoad; 430 431 class AMDGPUBufferStore : Intrinsic < 432 [], 433 [llvm_anyfloat_ty, // vdata(VGPR) -- can currently only select f32, v2f32, v4f32 434 llvm_v4i32_ty, // rsrc(SGPR) 435 llvm_i32_ty, // vindex(VGPR) 436 llvm_i32_ty, // offset(SGPR/VGPR/imm) 437 llvm_i1_ty, // glc(imm) 438 llvm_i1_ty], // slc(imm) 439 [IntrWriteMem]>; 440 def int_amdgcn_buffer_store_format : AMDGPUBufferStore; 441 def int_amdgcn_buffer_store : AMDGPUBufferStore; 442 443 class AMDGPUBufferAtomic : Intrinsic < 444 [llvm_i32_ty], 445 [llvm_i32_ty, // vdata(VGPR) 446 llvm_v4i32_ty, // rsrc(SGPR) 447 llvm_i32_ty, // vindex(VGPR) 448 llvm_i32_ty, // offset(SGPR/VGPR/imm) 449 llvm_i1_ty], // slc(imm) 450 []>; 451 def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; 452 def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; 453 def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic; 454 def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic; 455 def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic; 456 def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic; 457 def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic; 458 def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic; 459 def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic; 460 def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic; 461 def int_amdgcn_buffer_atomic_cmpswap : Intrinsic< 462 [llvm_i32_ty], 463 [llvm_i32_ty, // src(VGPR) 464 llvm_i32_ty, // cmp(VGPR) 465 llvm_v4i32_ty, // rsrc(SGPR) 466 llvm_i32_ty, // vindex(VGPR) 467 llvm_i32_ty, // offset(SGPR/VGPR/imm) 468 llvm_i1_ty], // slc(imm) 469 []>; 470 471 // Uses that do not set the done bit should set IntrWriteMem on the 472 // call site. 473 def int_amdgcn_exp : Intrinsic <[], [ 474 llvm_i32_ty, // tgt, 475 llvm_i32_ty, // en 476 llvm_any_ty, // src0 (f32 or i32) 477 LLVMMatchType<0>, // src1 478 LLVMMatchType<0>, // src2 479 LLVMMatchType<0>, // src3 480 llvm_i1_ty, // done 481 llvm_i1_ty // vm 482 ], 483 [] 484 >; 485 486 // exp with compr bit set. 487 def int_amdgcn_exp_compr : Intrinsic <[], [ 488 llvm_i32_ty, // tgt, 489 llvm_i32_ty, // en 490 llvm_anyvector_ty, // src0 (v2f16 or v2i16) 491 LLVMMatchType<0>, // src1 492 llvm_i1_ty, // done 493 llvm_i1_ty], // vm 494 [] 495 >; 496 497 def int_amdgcn_buffer_wbinvl1_sc : 498 GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">, 499 Intrinsic<[], [], []>; 500 501 def int_amdgcn_buffer_wbinvl1 : 502 GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, 503 Intrinsic<[], [], []>; 504 505 def int_amdgcn_s_dcache_inv : 506 GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">, 507 Intrinsic<[], [], []>; 508 509 def int_amdgcn_s_memtime : 510 GCCBuiltin<"__builtin_amdgcn_s_memtime">, 511 Intrinsic<[llvm_i64_ty], [], []>; 512 513 def int_amdgcn_s_sleep : 514 GCCBuiltin<"__builtin_amdgcn_s_sleep">, 515 Intrinsic<[], [llvm_i32_ty], []> { 516 } 517 518 def int_amdgcn_s_incperflevel : 519 GCCBuiltin<"__builtin_amdgcn_s_incperflevel">, 520 Intrinsic<[], [llvm_i32_ty], []> { 521 } 522 523 def int_amdgcn_s_decperflevel : 524 GCCBuiltin<"__builtin_amdgcn_s_decperflevel">, 525 Intrinsic<[], [llvm_i32_ty], []> { 526 } 527 528 def int_amdgcn_s_getreg : 529 GCCBuiltin<"__builtin_amdgcn_s_getreg">, 530 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>; 531 532 // __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0> 533 // param values: 0 = P10, 1 = P20, 2 = P0 534 def int_amdgcn_interp_mov : 535 GCCBuiltin<"__builtin_amdgcn_interp_mov">, 536 Intrinsic<[llvm_float_ty], 537 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 538 [IntrNoMem]>; 539 540 // __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0> 541 def int_amdgcn_interp_p1 : 542 GCCBuiltin<"__builtin_amdgcn_interp_p1">, 543 Intrinsic<[llvm_float_ty], 544 [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 545 [IntrNoMem]>; // This intrinsic reads from lds, but the memory 546 // values are constant, so it behaves like IntrNoMem. 547 548 // __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0> 549 def int_amdgcn_interp_p2 : 550 GCCBuiltin<"__builtin_amdgcn_interp_p2">, 551 Intrinsic<[llvm_float_ty], 552 [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], 553 [IntrNoMem]>; // See int_amdgcn_v_interp_p1 for why this is 554 // IntrNoMem. 555 556 // Pixel shaders only: whether the current pixel is live (i.e. not a helper 557 // invocation for derivative computation). 558 def int_amdgcn_ps_live : Intrinsic < 559 [llvm_i1_ty], 560 [], 561 [IntrNoMem]>; 562 563 def int_amdgcn_mbcnt_lo : 564 GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">, 565 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 566 567 def int_amdgcn_mbcnt_hi : 568 GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">, 569 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 570 571 // llvm.amdgcn.ds.swizzle src offset 572 def int_amdgcn_ds_swizzle : 573 GCCBuiltin<"__builtin_amdgcn_ds_swizzle">, 574 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 575 576 def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty], 577 [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] 578 >; 579 580 def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty], 581 [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] 582 >; 583 584 def int_amdgcn_lerp : 585 GCCBuiltin<"__builtin_amdgcn_lerp">, 586 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 587 588 def int_amdgcn_sad_u8 : 589 GCCBuiltin<"__builtin_amdgcn_sad_u8">, 590 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 591 592 def int_amdgcn_msad_u8 : 593 GCCBuiltin<"__builtin_amdgcn_msad_u8">, 594 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 595 596 def int_amdgcn_sad_hi_u8 : 597 GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">, 598 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 599 600 def int_amdgcn_sad_u16 : 601 GCCBuiltin<"__builtin_amdgcn_sad_u16">, 602 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 603 604 def int_amdgcn_qsad_pk_u16_u8 : 605 GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">, 606 Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; 607 608 def int_amdgcn_mqsad_pk_u16_u8 : 609 GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">, 610 Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; 611 612 def int_amdgcn_mqsad_u32_u8 : 613 GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, 614 Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>; 615 616 def int_amdgcn_cvt_pk_u8_f32 : 617 GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">, 618 Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; 619 620 def int_amdgcn_icmp : 621 Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty], 622 [IntrNoMem, IntrConvergent]>; 623 624 def int_amdgcn_fcmp : 625 Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty], 626 [IntrNoMem, IntrConvergent]>; 627 628 def int_amdgcn_readfirstlane : 629 GCCBuiltin<"__builtin_amdgcn_readfirstlane">, 630 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 631 632 def int_amdgcn_readlane : 633 GCCBuiltin<"__builtin_amdgcn_readlane">, 634 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 635 636 //===----------------------------------------------------------------------===// 637 // CI+ Intrinsics 638 //===----------------------------------------------------------------------===// 639 640 def int_amdgcn_s_dcache_inv_vol : 641 GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">, 642 Intrinsic<[], [], []>; 643 644 def int_amdgcn_buffer_wbinvl1_vol : 645 GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">, 646 Intrinsic<[], [], []>; 647 648 //===----------------------------------------------------------------------===// 649 // VI Intrinsics 650 //===----------------------------------------------------------------------===// 651 652 // llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl> 653 def int_amdgcn_mov_dpp : 654 Intrinsic<[llvm_anyint_ty], 655 [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, 656 llvm_i1_ty], [IntrNoMem, IntrConvergent]>; 657 658 def int_amdgcn_s_dcache_wb : 659 GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, 660 Intrinsic<[], [], []>; 661 662 def int_amdgcn_s_dcache_wb_vol : 663 GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, 664 Intrinsic<[], [], []>; 665 666 def int_amdgcn_s_memrealtime : 667 GCCBuiltin<"__builtin_amdgcn_s_memrealtime">, 668 Intrinsic<[llvm_i64_ty], [], []>; 669 670 // llvm.amdgcn.ds.permute <index> <src> 671 def int_amdgcn_ds_permute : 672 GCCBuiltin<"__builtin_amdgcn_ds_permute">, 673 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 674 675 // llvm.amdgcn.ds.bpermute <index> <src> 676 def int_amdgcn_ds_bpermute : 677 GCCBuiltin<"__builtin_amdgcn_ds_bpermute">, 678 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; 679 680 681 //===----------------------------------------------------------------------===// 682 // Special Intrinsics for backend internal use only. No frontend 683 // should emit calls to these. 684 // ===----------------------------------------------------------------------===// 685 def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty], 686 [llvm_i1_ty], [IntrConvergent] 687 >; 688 689 def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty], 690 [llvm_i64_ty], [IntrConvergent] 691 >; 692 693 def int_amdgcn_break : Intrinsic<[llvm_i64_ty], 694 [llvm_i64_ty], [IntrNoMem, IntrConvergent] 695 >; 696 697 def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty], 698 [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent] 699 >; 700 701 def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty], 702 [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent] 703 >; 704 705 def int_amdgcn_loop : Intrinsic<[llvm_i1_ty], 706 [llvm_i64_ty], [IntrConvergent] 707 >; 708 709 def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>; 710 711 // Represent unreachable in a divergent region. 712 def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>; 713 714 // Emit 2.5 ulp, no denormal division. Should only be inserted by 715 // pass based on !fpmath metadata. 716 def int_amdgcn_fdiv_fast : Intrinsic< 717 [llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] 718 >; 719 } 720