Home | History | Annotate | Download | only in IR
      1 //===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines all of the R600-specific intrinsics.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 class AMDGPUReadPreloadRegisterIntrinsic
     15   : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
     16 
     17 class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
     18   : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, GCCBuiltin<name>;
     19 
     20 let TargetPrefix = "r600" in {
     21 
     22 multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz {
     23   def _x : AMDGPUReadPreloadRegisterIntrinsic;
     24   def _y : AMDGPUReadPreloadRegisterIntrinsic;
     25   def _z : AMDGPUReadPreloadRegisterIntrinsic;
     26 }
     27 
     28 multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> {
     29   def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>;
     30   def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>;
     31   def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>;
     32 }
     33 
     34 defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
     35                                  <"__builtin_r600_read_global_size">;
     36 defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
     37                              <"__builtin_r600_read_ngroups">;
     38 defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
     39                           <"__builtin_r600_read_tgid">;
     40 
     41 defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz;
     42 defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz;
     43 
     44 def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">,
     45   Intrinsic<[], [], [IntrConvergent]>;
     46 
     47 // AS 7 is PARAM_I_ADDRESS, used for kernel arguments
     48 def int_r600_implicitarg_ptr :
     49   GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
     50   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], [IntrNoMem]>;
     51 
     52 def int_r600_rat_store_typed :
     53   // 1st parameter: Data
     54   // 2nd parameter: Index
     55   // 3rd parameter: Constant RAT ID
     56   Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>,
     57   GCCBuiltin<"__builtin_r600_rat_store_typed">;
     58 
     59 def int_r600_recipsqrt_ieee :  Intrinsic<
     60   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
     61 >;
     62 
     63 def int_r600_recipsqrt_clamped : Intrinsic<
     64   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
     65 >;
     66 
     67 def int_r600_cube : Intrinsic<
     68   [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]
     69 >;
     70 
     71 } // End TargetPrefix = "r600"
     72 
     73 let TargetPrefix = "amdgcn" in {
     74 
     75 //===----------------------------------------------------------------------===//
     76 // ABI Special Intrinsics
     77 //===----------------------------------------------------------------------===//
     78 
     79 defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz;
     80 defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
     81                                <"__builtin_amdgcn_workgroup_id">;
     82 
     83 def int_amdgcn_dispatch_ptr :
     84   GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
     85   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
     86 
     87 def int_amdgcn_queue_ptr :
     88   GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
     89   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
     90 
     91 def int_amdgcn_kernarg_segment_ptr :
     92   GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
     93   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
     94 
     95 def int_amdgcn_implicitarg_ptr :
     96   GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
     97   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
     98 
     99 def int_amdgcn_groupstaticsize :
    100   GCCBuiltin<"__builtin_amdgcn_groupstaticsize">,
    101   Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
    102 
    103 def int_amdgcn_dispatch_id :
    104   GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
    105   Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
    106 
    107 def int_amdgcn_implicit_buffer_ptr :
    108   GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
    109   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
    110 
    111 //===----------------------------------------------------------------------===//
    112 // Instruction Intrinsics
    113 //===----------------------------------------------------------------------===//
    114 
    115 // The first parameter is s_sendmsg immediate (i16),
    116 // the second one is copied to m0
    117 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
    118   Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
    119 def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
    120   Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
    121 
    122 def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
    123   Intrinsic<[], [], [IntrConvergent]>;
    124 
    125 def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
    126   Intrinsic<[], [], [IntrConvergent]>;
    127 
    128 def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
    129   Intrinsic<[], [llvm_i32_ty], []>;
    130 
    131 def int_amdgcn_div_scale : Intrinsic<
    132   // 1st parameter: Numerator
    133   // 2nd parameter: Denominator
    134   // 3rd parameter: Constant to select select between first and
    135   //                second. (0 = first, 1 = second).
    136   [llvm_anyfloat_ty, llvm_i1_ty],
    137   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
    138   [IntrNoMem]
    139 >;
    140 
    141 def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
    142   [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
    143   [IntrNoMem]
    144 >;
    145 
    146 def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty],
    147   [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
    148   [IntrNoMem]
    149 >;
    150 
    151 def int_amdgcn_trig_preop : Intrinsic<
    152   [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
    153 >;
    154 
    155 def int_amdgcn_sin : Intrinsic<
    156   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
    157 >;
    158 
    159 def int_amdgcn_cos : Intrinsic<
    160   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
    161 >;
    162 
    163 def int_amdgcn_log_clamp : Intrinsic<
    164   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
    165 >;
    166 
    167 def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
    168   Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
    169 >;
    170 
    171 def int_amdgcn_rcp : Intrinsic<
    172   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
    173 >;
    174 
    175 def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">,
    176   Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]
    177 >;
    178 
    179 def int_amdgcn_rsq :  Intrinsic<
    180   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
    181 >;
    182 
    183 def int_amdgcn_rsq_legacy :  GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
    184   Intrinsic<
    185   [llvm_float_ty], [llvm_float_ty], [IntrNoMem]
    186 >;
    187 
    188 def int_amdgcn_rsq_clamp : Intrinsic<
    189   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
    190 
    191 def int_amdgcn_ldexp : Intrinsic<
    192   [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
    193 >;
    194 
    195 def int_amdgcn_frexp_mant : Intrinsic<
    196   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
    197 >;
    198 
    199 def int_amdgcn_frexp_exp : Intrinsic<
    200   [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]
    201 >;
    202 
    203 // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
    204 // and always uses rtz, so is not suitable for implementing the OpenCL
    205 // fract function. It should be ok on VI.
    206 def int_amdgcn_fract : Intrinsic<
    207   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
    208 >;
    209 
    210 def int_amdgcn_cvt_pkrtz : Intrinsic<
    211   [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
    212 >;
    213 
    214 def int_amdgcn_class : Intrinsic<
    215   [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]
    216 >;
    217 
    218 def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">,
    219   Intrinsic<[llvm_anyfloat_ty],
    220     [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]
    221 >;
    222 
    223 def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">,
    224   Intrinsic<[llvm_float_ty],
    225     [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
    226 >;
    227 
    228 def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">,
    229   Intrinsic<[llvm_float_ty],
    230   [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
    231 >;
    232 
    233 def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">,
    234   Intrinsic<[llvm_float_ty],
    235     [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
    236 >;
    237 
    238 def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
    239   Intrinsic<[llvm_float_ty],
    240     [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
    241 >;
    242 
    243 // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
    244 // should be used.
    245 def int_amdgcn_sffbh :
    246   Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
    247 
    248 
    249 // Fields should mirror atomicrmw
    250 class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
    251   [llvm_anyptr_ty,
    252   LLVMMatchType<0>,
    253   llvm_i32_ty, // ordering
    254   llvm_i32_ty, // scope
    255   llvm_i1_ty], // isVolatile
    256   [IntrArgMemOnly, NoCapture<0>]
    257 >;
    258 
    259 def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
    260 def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
    261 
    262 class AMDGPUImageLoad : Intrinsic <
    263   [llvm_anyfloat_ty], // vdata(VGPR)
    264   [llvm_anyint_ty,    // vaddr(VGPR)
    265    llvm_anyint_ty,    // rsrc(SGPR)
    266    llvm_i32_ty,       // dmask(imm)
    267    llvm_i1_ty,        // glc(imm)
    268    llvm_i1_ty,        // slc(imm)
    269    llvm_i1_ty,        // lwe(imm)
    270    llvm_i1_ty],       // da(imm)
    271   [IntrReadMem]>;
    272 
    273 def int_amdgcn_image_load : AMDGPUImageLoad;
    274 def int_amdgcn_image_load_mip : AMDGPUImageLoad;
    275 def int_amdgcn_image_getresinfo : AMDGPUImageLoad;
    276 
    277 class AMDGPUImageStore : Intrinsic <
    278   [],
    279   [llvm_anyfloat_ty,  // vdata(VGPR)
    280    llvm_anyint_ty,    // vaddr(VGPR)
    281    llvm_anyint_ty,    // rsrc(SGPR)
    282    llvm_i32_ty,       // dmask(imm)
    283    llvm_i1_ty,        // glc(imm)
    284    llvm_i1_ty,        // slc(imm)
    285    llvm_i1_ty,        // lwe(imm)
    286    llvm_i1_ty],       // da(imm)
    287   []>;
    288 
    289 def int_amdgcn_image_store : AMDGPUImageStore;
    290 def int_amdgcn_image_store_mip : AMDGPUImageStore;
    291 
    292 class AMDGPUImageSample : Intrinsic <
    293     [llvm_anyfloat_ty], // vdata(VGPR)
    294     [llvm_anyfloat_ty,  // vaddr(VGPR)
    295      llvm_anyint_ty,    // rsrc(SGPR)
    296      llvm_v4i32_ty,     // sampler(SGPR)
    297      llvm_i32_ty,       // dmask(imm)
    298      llvm_i1_ty,        // unorm(imm)
    299      llvm_i1_ty,        // glc(imm)
    300      llvm_i1_ty,        // slc(imm)
    301      llvm_i1_ty,        // lwe(imm)
    302      llvm_i1_ty],       // da(imm)
    303      [IntrReadMem]>;
    304 
    305 // Basic sample
    306 def int_amdgcn_image_sample : AMDGPUImageSample;
    307 def int_amdgcn_image_sample_cl : AMDGPUImageSample;
    308 def int_amdgcn_image_sample_d : AMDGPUImageSample;
    309 def int_amdgcn_image_sample_d_cl : AMDGPUImageSample;
    310 def int_amdgcn_image_sample_l : AMDGPUImageSample;
    311 def int_amdgcn_image_sample_b : AMDGPUImageSample;
    312 def int_amdgcn_image_sample_b_cl : AMDGPUImageSample;
    313 def int_amdgcn_image_sample_lz : AMDGPUImageSample;
    314 def int_amdgcn_image_sample_cd : AMDGPUImageSample;
    315 def int_amdgcn_image_sample_cd_cl : AMDGPUImageSample;
    316 
    317 // Sample with comparison
    318 def int_amdgcn_image_sample_c : AMDGPUImageSample;
    319 def int_amdgcn_image_sample_c_cl : AMDGPUImageSample;
    320 def int_amdgcn_image_sample_c_d : AMDGPUImageSample;
    321 def int_amdgcn_image_sample_c_d_cl : AMDGPUImageSample;
    322 def int_amdgcn_image_sample_c_l : AMDGPUImageSample;
    323 def int_amdgcn_image_sample_c_b : AMDGPUImageSample;
    324 def int_amdgcn_image_sample_c_b_cl : AMDGPUImageSample;
    325 def int_amdgcn_image_sample_c_lz : AMDGPUImageSample;
    326 def int_amdgcn_image_sample_c_cd : AMDGPUImageSample;
    327 def int_amdgcn_image_sample_c_cd_cl : AMDGPUImageSample;
    328 
    329 // Sample with offsets
    330 def int_amdgcn_image_sample_o : AMDGPUImageSample;
    331 def int_amdgcn_image_sample_cl_o : AMDGPUImageSample;
    332 def int_amdgcn_image_sample_d_o : AMDGPUImageSample;
    333 def int_amdgcn_image_sample_d_cl_o : AMDGPUImageSample;
    334 def int_amdgcn_image_sample_l_o : AMDGPUImageSample;
    335 def int_amdgcn_image_sample_b_o : AMDGPUImageSample;
    336 def int_amdgcn_image_sample_b_cl_o : AMDGPUImageSample;
    337 def int_amdgcn_image_sample_lz_o : AMDGPUImageSample;
    338 def int_amdgcn_image_sample_cd_o : AMDGPUImageSample;
    339 def int_amdgcn_image_sample_cd_cl_o : AMDGPUImageSample;
    340 
    341 // Sample with comparison and offsets
    342 def int_amdgcn_image_sample_c_o : AMDGPUImageSample;
    343 def int_amdgcn_image_sample_c_cl_o : AMDGPUImageSample;
    344 def int_amdgcn_image_sample_c_d_o : AMDGPUImageSample;
    345 def int_amdgcn_image_sample_c_d_cl_o : AMDGPUImageSample;
    346 def int_amdgcn_image_sample_c_l_o : AMDGPUImageSample;
    347 def int_amdgcn_image_sample_c_b_o : AMDGPUImageSample;
    348 def int_amdgcn_image_sample_c_b_cl_o : AMDGPUImageSample;
    349 def int_amdgcn_image_sample_c_lz_o : AMDGPUImageSample;
    350 def int_amdgcn_image_sample_c_cd_o : AMDGPUImageSample;
    351 def int_amdgcn_image_sample_c_cd_cl_o : AMDGPUImageSample;
    352 
    353 // Basic gather4
    354 def int_amdgcn_image_gather4 : AMDGPUImageSample;
    355 def int_amdgcn_image_gather4_cl : AMDGPUImageSample;
    356 def int_amdgcn_image_gather4_l : AMDGPUImageSample;
    357 def int_amdgcn_image_gather4_b : AMDGPUImageSample;
    358 def int_amdgcn_image_gather4_b_cl : AMDGPUImageSample;
    359 def int_amdgcn_image_gather4_lz : AMDGPUImageSample;
    360 
    361 // Gather4 with comparison
    362 def int_amdgcn_image_gather4_c : AMDGPUImageSample;
    363 def int_amdgcn_image_gather4_c_cl : AMDGPUImageSample;
    364 def int_amdgcn_image_gather4_c_l : AMDGPUImageSample;
    365 def int_amdgcn_image_gather4_c_b : AMDGPUImageSample;
    366 def int_amdgcn_image_gather4_c_b_cl : AMDGPUImageSample;
    367 def int_amdgcn_image_gather4_c_lz : AMDGPUImageSample;
    368 
    369 // Gather4 with offsets
    370 def int_amdgcn_image_gather4_o : AMDGPUImageSample;
    371 def int_amdgcn_image_gather4_cl_o : AMDGPUImageSample;
    372 def int_amdgcn_image_gather4_l_o : AMDGPUImageSample;
    373 def int_amdgcn_image_gather4_b_o : AMDGPUImageSample;
    374 def int_amdgcn_image_gather4_b_cl_o : AMDGPUImageSample;
    375 def int_amdgcn_image_gather4_lz_o : AMDGPUImageSample;
    376 
    377 // Gather4 with comparison and offsets
    378 def int_amdgcn_image_gather4_c_o : AMDGPUImageSample;
    379 def int_amdgcn_image_gather4_c_cl_o : AMDGPUImageSample;
    380 def int_amdgcn_image_gather4_c_l_o : AMDGPUImageSample;
    381 def int_amdgcn_image_gather4_c_b_o : AMDGPUImageSample;
    382 def int_amdgcn_image_gather4_c_b_cl_o : AMDGPUImageSample;
    383 def int_amdgcn_image_gather4_c_lz_o : AMDGPUImageSample;
    384 
    385 def int_amdgcn_image_getlod : AMDGPUImageSample;
    386 
    387 class AMDGPUImageAtomic : Intrinsic <
    388   [llvm_i32_ty],
    389   [llvm_i32_ty,       // vdata(VGPR)
    390    llvm_anyint_ty,    // vaddr(VGPR)
    391    llvm_v8i32_ty,     // rsrc(SGPR)
    392    llvm_i1_ty,        // r128(imm)
    393    llvm_i1_ty,        // da(imm)
    394    llvm_i1_ty],       // slc(imm)
    395   []>;
    396 
    397 def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic;
    398 def int_amdgcn_image_atomic_add : AMDGPUImageAtomic;
    399 def int_amdgcn_image_atomic_sub : AMDGPUImageAtomic;
    400 def int_amdgcn_image_atomic_smin : AMDGPUImageAtomic;
    401 def int_amdgcn_image_atomic_umin : AMDGPUImageAtomic;
    402 def int_amdgcn_image_atomic_smax : AMDGPUImageAtomic;
    403 def int_amdgcn_image_atomic_umax : AMDGPUImageAtomic;
    404 def int_amdgcn_image_atomic_and : AMDGPUImageAtomic;
    405 def int_amdgcn_image_atomic_or : AMDGPUImageAtomic;
    406 def int_amdgcn_image_atomic_xor : AMDGPUImageAtomic;
    407 def int_amdgcn_image_atomic_inc : AMDGPUImageAtomic;
    408 def int_amdgcn_image_atomic_dec : AMDGPUImageAtomic;
    409 def int_amdgcn_image_atomic_cmpswap : Intrinsic <
    410   [llvm_i32_ty],
    411   [llvm_i32_ty,       // src(VGPR)
    412    llvm_i32_ty,       // cmp(VGPR)
    413    llvm_anyint_ty,    // vaddr(VGPR)
    414    llvm_v8i32_ty,     // rsrc(SGPR)
    415    llvm_i1_ty,        // r128(imm)
    416    llvm_i1_ty,        // da(imm)
    417    llvm_i1_ty],       // slc(imm)
    418   []>;
    419 
    420 class AMDGPUBufferLoad : Intrinsic <
    421   [llvm_anyfloat_ty],
    422   [llvm_v4i32_ty,     // rsrc(SGPR)
    423    llvm_i32_ty,       // vindex(VGPR)
    424    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    425    llvm_i1_ty,        // glc(imm)
    426    llvm_i1_ty],       // slc(imm)
    427   [IntrReadMem]>;
    428 def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
    429 def int_amdgcn_buffer_load : AMDGPUBufferLoad;
    430 
    431 class AMDGPUBufferStore : Intrinsic <
    432   [],
    433   [llvm_anyfloat_ty,  // vdata(VGPR) -- can currently only select f32, v2f32, v4f32
    434    llvm_v4i32_ty,     // rsrc(SGPR)
    435    llvm_i32_ty,       // vindex(VGPR)
    436    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    437    llvm_i1_ty,        // glc(imm)
    438    llvm_i1_ty],       // slc(imm)
    439   [IntrWriteMem]>;
    440 def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
    441 def int_amdgcn_buffer_store : AMDGPUBufferStore;
    442 
    443 class AMDGPUBufferAtomic : Intrinsic <
    444   [llvm_i32_ty],
    445   [llvm_i32_ty,       // vdata(VGPR)
    446    llvm_v4i32_ty,     // rsrc(SGPR)
    447    llvm_i32_ty,       // vindex(VGPR)
    448    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    449    llvm_i1_ty],       // slc(imm)
    450   []>;
    451 def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
    452 def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
    453 def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
    454 def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic;
    455 def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic;
    456 def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic;
    457 def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic;
    458 def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic;
    459 def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic;
    460 def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic;
    461 def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
    462   [llvm_i32_ty],
    463   [llvm_i32_ty,       // src(VGPR)
    464    llvm_i32_ty,       // cmp(VGPR)
    465    llvm_v4i32_ty,     // rsrc(SGPR)
    466    llvm_i32_ty,       // vindex(VGPR)
    467    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    468    llvm_i1_ty],       // slc(imm)
    469   []>;
    470 
    471 // Uses that do not set the done bit should set IntrWriteMem on the
    472 // call site.
    473 def int_amdgcn_exp : Intrinsic <[], [
    474   llvm_i32_ty,       // tgt,
    475   llvm_i32_ty,       // en
    476   llvm_any_ty,       // src0 (f32 or i32)
    477   LLVMMatchType<0>,  // src1
    478   LLVMMatchType<0>,  // src2
    479   LLVMMatchType<0>,  // src3
    480   llvm_i1_ty,        // done
    481   llvm_i1_ty         // vm
    482   ],
    483   []
    484 >;
    485 
    486 // exp with compr bit set.
    487 def int_amdgcn_exp_compr : Intrinsic <[], [
    488   llvm_i32_ty,       // tgt,
    489   llvm_i32_ty,       // en
    490   llvm_anyvector_ty, // src0 (v2f16 or v2i16)
    491   LLVMMatchType<0>,  // src1
    492   llvm_i1_ty,        // done
    493   llvm_i1_ty],       // vm
    494   []
    495 >;
    496 
    497 def int_amdgcn_buffer_wbinvl1_sc :
    498   GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
    499   Intrinsic<[], [], []>;
    500 
    501 def int_amdgcn_buffer_wbinvl1 :
    502   GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
    503   Intrinsic<[], [], []>;
    504 
    505 def int_amdgcn_s_dcache_inv :
    506   GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
    507   Intrinsic<[], [], []>;
    508 
    509 def int_amdgcn_s_memtime :
    510   GCCBuiltin<"__builtin_amdgcn_s_memtime">,
    511   Intrinsic<[llvm_i64_ty], [], []>;
    512 
    513 def int_amdgcn_s_sleep :
    514   GCCBuiltin<"__builtin_amdgcn_s_sleep">,
    515   Intrinsic<[], [llvm_i32_ty], []> {
    516 }
    517 
    518 def int_amdgcn_s_incperflevel :
    519   GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
    520   Intrinsic<[], [llvm_i32_ty], []> {
    521 }
    522 
    523 def int_amdgcn_s_decperflevel :
    524   GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
    525   Intrinsic<[], [llvm_i32_ty], []> {
    526 }
    527 
    528 def int_amdgcn_s_getreg :
    529   GCCBuiltin<"__builtin_amdgcn_s_getreg">,
    530   Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>;
    531 
    532 // __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0>
    533 // param values: 0 = P10, 1 = P20, 2 = P0
    534 def int_amdgcn_interp_mov :
    535   GCCBuiltin<"__builtin_amdgcn_interp_mov">,
    536   Intrinsic<[llvm_float_ty],
    537             [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    538             [IntrNoMem]>;
    539 
    540 // __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
    541 def int_amdgcn_interp_p1 :
    542   GCCBuiltin<"__builtin_amdgcn_interp_p1">,
    543   Intrinsic<[llvm_float_ty],
    544             [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    545             [IntrNoMem]>;  // This intrinsic reads from lds, but the memory
    546                            // values are constant, so it behaves like IntrNoMem.
    547 
    548 // __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
    549 def int_amdgcn_interp_p2 :
    550   GCCBuiltin<"__builtin_amdgcn_interp_p2">,
    551   Intrinsic<[llvm_float_ty],
    552             [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    553             [IntrNoMem]>;  // See int_amdgcn_v_interp_p1 for why this is
    554                            // IntrNoMem.
    555 
    556 // Pixel shaders only: whether the current pixel is live (i.e. not a helper
    557 // invocation for derivative computation).
    558 def int_amdgcn_ps_live : Intrinsic <
    559   [llvm_i1_ty],
    560   [],
    561   [IntrNoMem]>;
    562 
    563 def int_amdgcn_mbcnt_lo :
    564   GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
    565   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    566 
    567 def int_amdgcn_mbcnt_hi :
    568   GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
    569   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    570 
    571 // llvm.amdgcn.ds.swizzle src offset
    572 def int_amdgcn_ds_swizzle :
    573   GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
    574   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    575 
    576 def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
    577   [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]
    578 >;
    579 
    580 def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
    581   [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]
    582 >;
    583 
    584 def int_amdgcn_lerp :
    585   GCCBuiltin<"__builtin_amdgcn_lerp">,
    586   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    587 
    588 def int_amdgcn_sad_u8 :
    589   GCCBuiltin<"__builtin_amdgcn_sad_u8">,
    590   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    591 
    592 def int_amdgcn_msad_u8 :
    593   GCCBuiltin<"__builtin_amdgcn_msad_u8">,
    594   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    595 
    596 def int_amdgcn_sad_hi_u8 :
    597   GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">,
    598   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    599 
    600 def int_amdgcn_sad_u16 :
    601   GCCBuiltin<"__builtin_amdgcn_sad_u16">,
    602   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    603 
    604 def int_amdgcn_qsad_pk_u16_u8 :
    605   GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
    606   Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>;
    607 
    608 def int_amdgcn_mqsad_pk_u16_u8 :
    609   GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
    610   Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>;
    611 
    612 def int_amdgcn_mqsad_u32_u8 :
    613   GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
    614   Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
    615 
    616 def int_amdgcn_cvt_pk_u8_f32 :
    617   GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
    618   Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    619 
    620 def int_amdgcn_icmp :
    621   Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
    622             [IntrNoMem, IntrConvergent]>;
    623 
    624 def int_amdgcn_fcmp :
    625   Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
    626             [IntrNoMem, IntrConvergent]>;
    627 
    628 def int_amdgcn_readfirstlane :
    629   GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
    630   Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    631 
    632 def int_amdgcn_readlane :
    633   GCCBuiltin<"__builtin_amdgcn_readlane">,
    634   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    635 
    636 //===----------------------------------------------------------------------===//
    637 // CI+ Intrinsics
    638 //===----------------------------------------------------------------------===//
    639 
    640 def int_amdgcn_s_dcache_inv_vol :
    641   GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
    642   Intrinsic<[], [], []>;
    643 
    644 def int_amdgcn_buffer_wbinvl1_vol :
    645   GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
    646   Intrinsic<[], [], []>;
    647 
    648 //===----------------------------------------------------------------------===//
    649 // VI Intrinsics
    650 //===----------------------------------------------------------------------===//
    651 
    652 // llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
    653 def int_amdgcn_mov_dpp :
    654   Intrinsic<[llvm_anyint_ty],
    655             [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
    656              llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
    657 
    658 def int_amdgcn_s_dcache_wb :
    659   GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
    660   Intrinsic<[], [], []>;
    661 
    662 def int_amdgcn_s_dcache_wb_vol :
    663   GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
    664   Intrinsic<[], [], []>;
    665 
    666 def int_amdgcn_s_memrealtime :
    667   GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
    668   Intrinsic<[llvm_i64_ty], [], []>;
    669 
    670 // llvm.amdgcn.ds.permute <index> <src>
    671 def int_amdgcn_ds_permute :
    672   GCCBuiltin<"__builtin_amdgcn_ds_permute">,
    673   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    674 
    675 // llvm.amdgcn.ds.bpermute <index> <src>
    676 def int_amdgcn_ds_bpermute :
    677   GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
    678   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    679 
    680 
    681 //===----------------------------------------------------------------------===//
    682 // Special Intrinsics for backend internal use only. No frontend
    683 // should emit calls to these.
    684 // ===----------------------------------------------------------------------===//
    685 def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
    686   [llvm_i1_ty], [IntrConvergent]
    687 >;
    688 
    689 def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
    690   [llvm_i64_ty], [IntrConvergent]
    691 >;
    692 
    693 def int_amdgcn_break : Intrinsic<[llvm_i64_ty],
    694   [llvm_i64_ty], [IntrNoMem, IntrConvergent]
    695 >;
    696 
    697 def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty],
    698   [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
    699 >;
    700 
    701 def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty],
    702   [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
    703 >;
    704 
    705 def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
    706   [llvm_i64_ty], [IntrConvergent]
    707 >;
    708 
    709 def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
    710 
    711 // Represent unreachable in a divergent region.
    712 def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>;
    713 
    714 // Emit 2.5 ulp, no denormal division. Should only be inserted by
    715 // pass based on !fpmath metadata.
    716 def int_amdgcn_fdiv_fast : Intrinsic<
    717   [llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
    718 >;
    719 }
    720