Home | History | Annotate | Download | only in IR
      1 //===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines all of the R600-specific intrinsics.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 class AMDGPUReadPreloadRegisterIntrinsic
     15   : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
     16 
     17 class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
     18   : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<name>;
     19 
     20 let TargetPrefix = "r600" in {
     21 
     22 multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz {
     23   def _x : AMDGPUReadPreloadRegisterIntrinsic;
     24   def _y : AMDGPUReadPreloadRegisterIntrinsic;
     25   def _z : AMDGPUReadPreloadRegisterIntrinsic;
     26 }
     27 
     28 multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> {
     29   def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>;
     30   def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>;
     31   def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>;
     32 }
     33 
     34 defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
     35                                  <"__builtin_r600_read_global_size">;
     36 defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
     37                              <"__builtin_r600_read_ngroups">;
     38 defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
     39                           <"__builtin_r600_read_tgid">;
     40 
     41 defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz;
     42 defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz;
     43 
     44 def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">,
     45   Intrinsic<[], [], [IntrConvergent]>;
     46 
     47 // AS 7 is PARAM_I_ADDRESS, used for kernel arguments
     48 def int_r600_implicitarg_ptr :
     49   GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
     50   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [],
     51   [IntrNoMem, IntrSpeculatable]>;
     52 
     53 def int_r600_rat_store_typed :
     54   // 1st parameter: Data
     55   // 2nd parameter: Index
     56   // 3rd parameter: Constant RAT ID
     57   Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>,
     58   GCCBuiltin<"__builtin_r600_rat_store_typed">;
     59 
     60 def int_r600_recipsqrt_ieee :  Intrinsic<
     61   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
     62 >;
     63 
     64 def int_r600_recipsqrt_clamped : Intrinsic<
     65   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
     66 >;
     67 
     68 def int_r600_cube : Intrinsic<
     69   [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable]
     70 >;
     71 
     72 } // End TargetPrefix = "r600"
     73 
     74 let TargetPrefix = "amdgcn" in {
     75 
     76 //===----------------------------------------------------------------------===//
     77 // ABI Special Intrinsics
     78 //===----------------------------------------------------------------------===//
     79 
     80 defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz;
     81 defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
     82                                <"__builtin_amdgcn_workgroup_id">;
     83 
     84 def int_amdgcn_dispatch_ptr :
     85   GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
     86   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
     87   [IntrNoMem, IntrSpeculatable]>;
     88 
     89 def int_amdgcn_queue_ptr :
     90   GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
     91   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
     92   [IntrNoMem, IntrSpeculatable]>;
     93 
     94 def int_amdgcn_kernarg_segment_ptr :
     95   GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
     96   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
     97   [IntrNoMem, IntrSpeculatable]>;
     98 
     99 def int_amdgcn_implicitarg_ptr :
    100   GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
    101   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
    102   [IntrNoMem, IntrSpeculatable]>;
    103 
    104 def int_amdgcn_groupstaticsize :
    105   GCCBuiltin<"__builtin_amdgcn_groupstaticsize">,
    106   Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
    107 
    108 def int_amdgcn_dispatch_id :
    109   GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
    110   Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>;
    111 
    112 def int_amdgcn_implicit_buffer_ptr :
    113   GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
    114   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
    115   [IntrNoMem, IntrSpeculatable]>;
    116 
    117 // Set EXEC to the 64-bit value given.
    118 // This is always moved to the beginning of the basic block.
    119 def int_amdgcn_init_exec : Intrinsic<[],
    120   [llvm_i64_ty],      // 64-bit literal constant
    121   [IntrConvergent]>;
    122 
    123 // Set EXEC according to a thread count packed in an SGPR input:
    124 //    thread_count = (input >> bitoffset) & 0x7f;
    125 // This is always moved to the beginning of the basic block.
    126 def int_amdgcn_init_exec_from_input : Intrinsic<[],
    127   [llvm_i32_ty,       // 32-bit SGPR input
    128    llvm_i32_ty],      // bit offset of the thread count
    129   [IntrConvergent]>;
    130 
    131 
    132 //===----------------------------------------------------------------------===//
    133 // Instruction Intrinsics
    134 //===----------------------------------------------------------------------===//
    135 
    136 // The first parameter is s_sendmsg immediate (i16),
    137 // the second one is copied to m0
    138 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
    139   Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
    140 def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
    141   Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
    142 
    143 def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
    144   Intrinsic<[], [], [IntrConvergent]>;
    145 
    146 def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
    147   Intrinsic<[], [], [IntrConvergent]>;
    148 
    149 def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
    150   Intrinsic<[], [llvm_i32_ty], []>;
    151 
    152 def int_amdgcn_div_scale : Intrinsic<
    153   // 1st parameter: Numerator
    154   // 2nd parameter: Denominator
    155   // 3rd parameter: Constant to select select between first and
    156   //                second. (0 = first, 1 = second).
    157   [llvm_anyfloat_ty, llvm_i1_ty],
    158   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
    159   [IntrNoMem, IntrSpeculatable]
    160 >;
    161 
    162 def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
    163   [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
    164   [IntrNoMem, IntrSpeculatable]
    165 >;
    166 
    167 def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty],
    168   [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
    169   [IntrNoMem, IntrSpeculatable]
    170 >;
    171 
    172 def int_amdgcn_trig_preop : Intrinsic<
    173   [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
    174   [IntrNoMem, IntrSpeculatable]
    175 >;
    176 
    177 def int_amdgcn_sin : Intrinsic<
    178   [llvm_anyfloat_ty], [LLVMMatchType<0>],
    179   [IntrNoMem, IntrSpeculatable]
    180 >;
    181 
    182 def int_amdgcn_cos : Intrinsic<
    183   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    184 >;
    185 
    186 def int_amdgcn_log_clamp : Intrinsic<
    187   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    188 >;
    189 
    190 def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
    191   Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
    192   [IntrNoMem, IntrSpeculatable]
    193 >;
    194 
    195 def int_amdgcn_rcp : Intrinsic<
    196   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    197 >;
    198 
    199 def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">,
    200   Intrinsic<[llvm_float_ty], [llvm_float_ty],
    201   [IntrNoMem, IntrSpeculatable]
    202 >;
    203 
    204 def int_amdgcn_rsq :  Intrinsic<
    205   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    206 >;
    207 
    208 def int_amdgcn_rsq_legacy :  GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
    209   Intrinsic<
    210   [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]
    211 >;
    212 
    213 def int_amdgcn_rsq_clamp : Intrinsic<
    214   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>;
    215 
    216 def int_amdgcn_ldexp : Intrinsic<
    217   [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
    218   [IntrNoMem, IntrSpeculatable]
    219 >;
    220 
    221 def int_amdgcn_frexp_mant : Intrinsic<
    222   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    223 >;
    224 
    225 def int_amdgcn_frexp_exp : Intrinsic<
    226   [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable]
    227 >;
    228 
    229 // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
    230 // and always uses rtz, so is not suitable for implementing the OpenCL
    231 // fract function. It should be ok on VI.
    232 def int_amdgcn_fract : Intrinsic<
    233   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    234 >;
    235 
    236 def int_amdgcn_cvt_pkrtz : Intrinsic<
    237   [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
    238   [IntrNoMem, IntrSpeculatable]
    239 >;
    240 
    241 def int_amdgcn_class : Intrinsic<
    242   [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
    243   [IntrNoMem, IntrSpeculatable]
    244 >;
    245 
    246 def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">,
    247   Intrinsic<[llvm_anyfloat_ty],
    248     [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
    249     [IntrNoMem, IntrSpeculatable]
    250 >;
    251 
    252 def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">,
    253   Intrinsic<[llvm_float_ty],
    254     [llvm_float_ty, llvm_float_ty, llvm_float_ty],
    255     [IntrNoMem, IntrSpeculatable]
    256 >;
    257 
    258 def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">,
    259   Intrinsic<[llvm_float_ty],
    260   [llvm_float_ty, llvm_float_ty, llvm_float_ty],
    261   [IntrNoMem, IntrSpeculatable]
    262 >;
    263 
    264 def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">,
    265   Intrinsic<[llvm_float_ty],
    266     [llvm_float_ty, llvm_float_ty, llvm_float_ty],
    267     [IntrNoMem, IntrSpeculatable]
    268 >;
    269 
    270 def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
    271   Intrinsic<[llvm_float_ty],
    272     [llvm_float_ty, llvm_float_ty, llvm_float_ty],
    273     [IntrNoMem, IntrSpeculatable]
    274 >;
    275 
    276 // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
    277 // should be used.
    278 def int_amdgcn_sffbh :
    279   Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>],
    280   [IntrNoMem, IntrSpeculatable]
    281 >;
    282 
    283 
    284 // Fields should mirror atomicrmw
    285 class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
    286   [llvm_anyptr_ty,
    287   LLVMMatchType<0>,
    288   llvm_i32_ty, // ordering
    289   llvm_i32_ty, // scope
    290   llvm_i1_ty], // isVolatile
    291   [IntrArgMemOnly, NoCapture<0>]
    292 >;
    293 
    294 def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
    295 def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
    296 
    297 class AMDGPUImageLoad : Intrinsic <
    298   [llvm_anyfloat_ty], // vdata(VGPR)
    299   [llvm_anyint_ty,    // vaddr(VGPR)
    300    llvm_anyint_ty,    // rsrc(SGPR)
    301    llvm_i32_ty,       // dmask(imm)
    302    llvm_i1_ty,        // glc(imm)
    303    llvm_i1_ty,        // slc(imm)
    304    llvm_i1_ty,        // lwe(imm)
    305    llvm_i1_ty],       // da(imm)
    306   [IntrReadMem]>;
    307 
    308 def int_amdgcn_image_load : AMDGPUImageLoad;
    309 def int_amdgcn_image_load_mip : AMDGPUImageLoad;
    310 def int_amdgcn_image_getresinfo : AMDGPUImageLoad;
    311 
    312 class AMDGPUImageStore : Intrinsic <
    313   [],
    314   [llvm_anyfloat_ty,  // vdata(VGPR)
    315    llvm_anyint_ty,    // vaddr(VGPR)
    316    llvm_anyint_ty,    // rsrc(SGPR)
    317    llvm_i32_ty,       // dmask(imm)
    318    llvm_i1_ty,        // glc(imm)
    319    llvm_i1_ty,        // slc(imm)
    320    llvm_i1_ty,        // lwe(imm)
    321    llvm_i1_ty],       // da(imm)
    322   []>;
    323 
    324 def int_amdgcn_image_store : AMDGPUImageStore;
    325 def int_amdgcn_image_store_mip : AMDGPUImageStore;
    326 
    327 class AMDGPUImageSample : Intrinsic <
    328     [llvm_anyfloat_ty], // vdata(VGPR)
    329     [llvm_anyfloat_ty,  // vaddr(VGPR)
    330      llvm_anyint_ty,    // rsrc(SGPR)
    331      llvm_v4i32_ty,     // sampler(SGPR)
    332      llvm_i32_ty,       // dmask(imm)
    333      llvm_i1_ty,        // unorm(imm)
    334      llvm_i1_ty,        // glc(imm)
    335      llvm_i1_ty,        // slc(imm)
    336      llvm_i1_ty,        // lwe(imm)
    337      llvm_i1_ty],       // da(imm)
    338      [IntrReadMem]>;
    339 
    340 // Basic sample
    341 def int_amdgcn_image_sample : AMDGPUImageSample;
    342 def int_amdgcn_image_sample_cl : AMDGPUImageSample;
    343 def int_amdgcn_image_sample_d : AMDGPUImageSample;
    344 def int_amdgcn_image_sample_d_cl : AMDGPUImageSample;
    345 def int_amdgcn_image_sample_l : AMDGPUImageSample;
    346 def int_amdgcn_image_sample_b : AMDGPUImageSample;
    347 def int_amdgcn_image_sample_b_cl : AMDGPUImageSample;
    348 def int_amdgcn_image_sample_lz : AMDGPUImageSample;
    349 def int_amdgcn_image_sample_cd : AMDGPUImageSample;
    350 def int_amdgcn_image_sample_cd_cl : AMDGPUImageSample;
    351 
    352 // Sample with comparison
    353 def int_amdgcn_image_sample_c : AMDGPUImageSample;
    354 def int_amdgcn_image_sample_c_cl : AMDGPUImageSample;
    355 def int_amdgcn_image_sample_c_d : AMDGPUImageSample;
    356 def int_amdgcn_image_sample_c_d_cl : AMDGPUImageSample;
    357 def int_amdgcn_image_sample_c_l : AMDGPUImageSample;
    358 def int_amdgcn_image_sample_c_b : AMDGPUImageSample;
    359 def int_amdgcn_image_sample_c_b_cl : AMDGPUImageSample;
    360 def int_amdgcn_image_sample_c_lz : AMDGPUImageSample;
    361 def int_amdgcn_image_sample_c_cd : AMDGPUImageSample;
    362 def int_amdgcn_image_sample_c_cd_cl : AMDGPUImageSample;
    363 
    364 // Sample with offsets
    365 def int_amdgcn_image_sample_o : AMDGPUImageSample;
    366 def int_amdgcn_image_sample_cl_o : AMDGPUImageSample;
    367 def int_amdgcn_image_sample_d_o : AMDGPUImageSample;
    368 def int_amdgcn_image_sample_d_cl_o : AMDGPUImageSample;
    369 def int_amdgcn_image_sample_l_o : AMDGPUImageSample;
    370 def int_amdgcn_image_sample_b_o : AMDGPUImageSample;
    371 def int_amdgcn_image_sample_b_cl_o : AMDGPUImageSample;
    372 def int_amdgcn_image_sample_lz_o : AMDGPUImageSample;
    373 def int_amdgcn_image_sample_cd_o : AMDGPUImageSample;
    374 def int_amdgcn_image_sample_cd_cl_o : AMDGPUImageSample;
    375 
    376 // Sample with comparison and offsets
    377 def int_amdgcn_image_sample_c_o : AMDGPUImageSample;
    378 def int_amdgcn_image_sample_c_cl_o : AMDGPUImageSample;
    379 def int_amdgcn_image_sample_c_d_o : AMDGPUImageSample;
    380 def int_amdgcn_image_sample_c_d_cl_o : AMDGPUImageSample;
    381 def int_amdgcn_image_sample_c_l_o : AMDGPUImageSample;
    382 def int_amdgcn_image_sample_c_b_o : AMDGPUImageSample;
    383 def int_amdgcn_image_sample_c_b_cl_o : AMDGPUImageSample;
    384 def int_amdgcn_image_sample_c_lz_o : AMDGPUImageSample;
    385 def int_amdgcn_image_sample_c_cd_o : AMDGPUImageSample;
    386 def int_amdgcn_image_sample_c_cd_cl_o : AMDGPUImageSample;
    387 
    388 // Basic gather4
    389 def int_amdgcn_image_gather4 : AMDGPUImageSample;
    390 def int_amdgcn_image_gather4_cl : AMDGPUImageSample;
    391 def int_amdgcn_image_gather4_l : AMDGPUImageSample;
    392 def int_amdgcn_image_gather4_b : AMDGPUImageSample;
    393 def int_amdgcn_image_gather4_b_cl : AMDGPUImageSample;
    394 def int_amdgcn_image_gather4_lz : AMDGPUImageSample;
    395 
    396 // Gather4 with comparison
    397 def int_amdgcn_image_gather4_c : AMDGPUImageSample;
    398 def int_amdgcn_image_gather4_c_cl : AMDGPUImageSample;
    399 def int_amdgcn_image_gather4_c_l : AMDGPUImageSample;
    400 def int_amdgcn_image_gather4_c_b : AMDGPUImageSample;
    401 def int_amdgcn_image_gather4_c_b_cl : AMDGPUImageSample;
    402 def int_amdgcn_image_gather4_c_lz : AMDGPUImageSample;
    403 
    404 // Gather4 with offsets
    405 def int_amdgcn_image_gather4_o : AMDGPUImageSample;
    406 def int_amdgcn_image_gather4_cl_o : AMDGPUImageSample;
    407 def int_amdgcn_image_gather4_l_o : AMDGPUImageSample;
    408 def int_amdgcn_image_gather4_b_o : AMDGPUImageSample;
    409 def int_amdgcn_image_gather4_b_cl_o : AMDGPUImageSample;
    410 def int_amdgcn_image_gather4_lz_o : AMDGPUImageSample;
    411 
    412 // Gather4 with comparison and offsets
    413 def int_amdgcn_image_gather4_c_o : AMDGPUImageSample;
    414 def int_amdgcn_image_gather4_c_cl_o : AMDGPUImageSample;
    415 def int_amdgcn_image_gather4_c_l_o : AMDGPUImageSample;
    416 def int_amdgcn_image_gather4_c_b_o : AMDGPUImageSample;
    417 def int_amdgcn_image_gather4_c_b_cl_o : AMDGPUImageSample;
    418 def int_amdgcn_image_gather4_c_lz_o : AMDGPUImageSample;
    419 
    420 def int_amdgcn_image_getlod : AMDGPUImageSample;
    421 
    422 class AMDGPUImageAtomic : Intrinsic <
    423   [llvm_i32_ty],
    424   [llvm_i32_ty,       // vdata(VGPR)
    425    llvm_anyint_ty,    // vaddr(VGPR)
    426    llvm_v8i32_ty,     // rsrc(SGPR)
    427    llvm_i1_ty,        // r128(imm)
    428    llvm_i1_ty,        // da(imm)
    429    llvm_i1_ty],       // slc(imm)
    430   []>;
    431 
    432 def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic;
    433 def int_amdgcn_image_atomic_add : AMDGPUImageAtomic;
    434 def int_amdgcn_image_atomic_sub : AMDGPUImageAtomic;
    435 def int_amdgcn_image_atomic_smin : AMDGPUImageAtomic;
    436 def int_amdgcn_image_atomic_umin : AMDGPUImageAtomic;
    437 def int_amdgcn_image_atomic_smax : AMDGPUImageAtomic;
    438 def int_amdgcn_image_atomic_umax : AMDGPUImageAtomic;
    439 def int_amdgcn_image_atomic_and : AMDGPUImageAtomic;
    440 def int_amdgcn_image_atomic_or : AMDGPUImageAtomic;
    441 def int_amdgcn_image_atomic_xor : AMDGPUImageAtomic;
    442 def int_amdgcn_image_atomic_inc : AMDGPUImageAtomic;
    443 def int_amdgcn_image_atomic_dec : AMDGPUImageAtomic;
    444 def int_amdgcn_image_atomic_cmpswap : Intrinsic <
    445   [llvm_i32_ty],
    446   [llvm_i32_ty,       // src(VGPR)
    447    llvm_i32_ty,       // cmp(VGPR)
    448    llvm_anyint_ty,    // vaddr(VGPR)
    449    llvm_v8i32_ty,     // rsrc(SGPR)
    450    llvm_i1_ty,        // r128(imm)
    451    llvm_i1_ty,        // da(imm)
    452    llvm_i1_ty],       // slc(imm)
    453   []>;
    454 
    455 class AMDGPUBufferLoad : Intrinsic <
    456   [llvm_anyfloat_ty],
    457   [llvm_v4i32_ty,     // rsrc(SGPR)
    458    llvm_i32_ty,       // vindex(VGPR)
    459    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    460    llvm_i1_ty,        // glc(imm)
    461    llvm_i1_ty],       // slc(imm)
    462   [IntrReadMem]>;
    463 def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
    464 def int_amdgcn_buffer_load : AMDGPUBufferLoad;
    465 
    466 class AMDGPUBufferStore : Intrinsic <
    467   [],
    468   [llvm_anyfloat_ty,  // vdata(VGPR) -- can currently only select f32, v2f32, v4f32
    469    llvm_v4i32_ty,     // rsrc(SGPR)
    470    llvm_i32_ty,       // vindex(VGPR)
    471    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    472    llvm_i1_ty,        // glc(imm)
    473    llvm_i1_ty],       // slc(imm)
    474   [IntrWriteMem]>;
    475 def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
    476 def int_amdgcn_buffer_store : AMDGPUBufferStore;
    477 
    478 class AMDGPUBufferAtomic : Intrinsic <
    479   [llvm_i32_ty],
    480   [llvm_i32_ty,       // vdata(VGPR)
    481    llvm_v4i32_ty,     // rsrc(SGPR)
    482    llvm_i32_ty,       // vindex(VGPR)
    483    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    484    llvm_i1_ty],       // slc(imm)
    485   []>;
    486 def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
    487 def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
    488 def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
    489 def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic;
    490 def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic;
    491 def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic;
    492 def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic;
    493 def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic;
    494 def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic;
    495 def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic;
    496 def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
    497   [llvm_i32_ty],
    498   [llvm_i32_ty,       // src(VGPR)
    499    llvm_i32_ty,       // cmp(VGPR)
    500    llvm_v4i32_ty,     // rsrc(SGPR)
    501    llvm_i32_ty,       // vindex(VGPR)
    502    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    503    llvm_i1_ty],       // slc(imm)
    504   []>;
    505 
    506 // Uses that do not set the done bit should set IntrWriteMem on the
    507 // call site.
    508 def int_amdgcn_exp : Intrinsic <[], [
    509   llvm_i32_ty,       // tgt,
    510   llvm_i32_ty,       // en
    511   llvm_any_ty,       // src0 (f32 or i32)
    512   LLVMMatchType<0>,  // src1
    513   LLVMMatchType<0>,  // src2
    514   LLVMMatchType<0>,  // src3
    515   llvm_i1_ty,        // done
    516   llvm_i1_ty         // vm
    517   ],
    518   []
    519 >;
    520 
    521 // exp with compr bit set.
    522 def int_amdgcn_exp_compr : Intrinsic <[], [
    523   llvm_i32_ty,       // tgt,
    524   llvm_i32_ty,       // en
    525   llvm_anyvector_ty, // src0 (v2f16 or v2i16)
    526   LLVMMatchType<0>,  // src1
    527   llvm_i1_ty,        // done
    528   llvm_i1_ty],       // vm
    529   []
    530 >;
    531 
    532 def int_amdgcn_buffer_wbinvl1_sc :
    533   GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
    534   Intrinsic<[], [], []>;
    535 
    536 def int_amdgcn_buffer_wbinvl1 :
    537   GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
    538   Intrinsic<[], [], []>;
    539 
    540 def int_amdgcn_s_dcache_inv :
    541   GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
    542   Intrinsic<[], [], []>;
    543 
    544 def int_amdgcn_s_memtime :
    545   GCCBuiltin<"__builtin_amdgcn_s_memtime">,
    546   Intrinsic<[llvm_i64_ty], [], []>;
    547 
    548 def int_amdgcn_s_sleep :
    549   GCCBuiltin<"__builtin_amdgcn_s_sleep">,
    550   Intrinsic<[], [llvm_i32_ty], []> {
    551 }
    552 
    553 def int_amdgcn_s_incperflevel :
    554   GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
    555   Intrinsic<[], [llvm_i32_ty], []> {
    556 }
    557 
    558 def int_amdgcn_s_decperflevel :
    559   GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
    560   Intrinsic<[], [llvm_i32_ty], []> {
    561 }
    562 
    563 def int_amdgcn_s_getreg :
    564   GCCBuiltin<"__builtin_amdgcn_s_getreg">,
    565   Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
    566   [IntrReadMem, IntrSpeculatable]
    567 >;
    568 
    569 // int_amdgcn_s_getpc is provided to allow a specific style of position
    570 // independent code to determine the high part of its address when it is
    571 // known (through convention) that the code and any data of interest does
    572 // not cross a 4Gb address boundary. Use for any other purpose may not
    573 // produce the desired results as optimizations may cause code movement,
    574 // especially as we explicitly use IntrNoMem to allow optimizations.
    575 def int_amdgcn_s_getpc :
    576   GCCBuiltin<"__builtin_amdgcn_s_getpc">,
    577   Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>;
    578 
    579 // __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0>
    580 // param values: 0 = P10, 1 = P20, 2 = P0
    581 def int_amdgcn_interp_mov :
    582   GCCBuiltin<"__builtin_amdgcn_interp_mov">,
    583   Intrinsic<[llvm_float_ty],
    584             [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    585             [IntrNoMem, IntrSpeculatable]>;
    586 
    587 // __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
    588 // This intrinsic reads from lds, but the memory values are constant,
    589 // so it behaves like IntrNoMem.
    590 def int_amdgcn_interp_p1 :
    591   GCCBuiltin<"__builtin_amdgcn_interp_p1">,
    592   Intrinsic<[llvm_float_ty],
    593             [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    594             [IntrNoMem, IntrSpeculatable]>;
    595 
    596 // __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
    597 def int_amdgcn_interp_p2 :
    598   GCCBuiltin<"__builtin_amdgcn_interp_p2">,
    599   Intrinsic<[llvm_float_ty],
    600             [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    601             [IntrNoMem, IntrSpeculatable]>;
    602           // See int_amdgcn_v_interp_p1 for why this is IntrNoMem.
    603 
    604 // Pixel shaders only: whether the current pixel is live (i.e. not a helper
    605 // invocation for derivative computation).
    606 def int_amdgcn_ps_live : Intrinsic <
    607   [llvm_i1_ty],
    608   [],
    609   [IntrNoMem]>;
    610 
    611 def int_amdgcn_mbcnt_lo :
    612   GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
    613   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    614 
    615 def int_amdgcn_mbcnt_hi :
    616   GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
    617   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    618 
    619 // llvm.amdgcn.ds.swizzle src offset
    620 def int_amdgcn_ds_swizzle :
    621   GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
    622   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    623 
    624 def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
    625   [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
    626   [IntrNoMem, IntrSpeculatable]
    627 >;
    628 
    629 def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
    630   [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
    631   [IntrNoMem, IntrSpeculatable]
    632 >;
    633 
    634 def int_amdgcn_lerp :
    635   GCCBuiltin<"__builtin_amdgcn_lerp">,
    636   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    637   [IntrNoMem, IntrSpeculatable]
    638 >;
    639 
    640 def int_amdgcn_sad_u8 :
    641   GCCBuiltin<"__builtin_amdgcn_sad_u8">,
    642   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    643   [IntrNoMem, IntrSpeculatable]
    644 >;
    645 
    646 def int_amdgcn_msad_u8 :
    647   GCCBuiltin<"__builtin_amdgcn_msad_u8">,
    648   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    649   [IntrNoMem, IntrSpeculatable]
    650 >;
    651 
    652 def int_amdgcn_sad_hi_u8 :
    653   GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">,
    654   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    655   [IntrNoMem, IntrSpeculatable]
    656 >;
    657 
    658 def int_amdgcn_sad_u16 :
    659   GCCBuiltin<"__builtin_amdgcn_sad_u16">,
    660   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    661   [IntrNoMem, IntrSpeculatable]
    662 >;
    663 
    664 def int_amdgcn_qsad_pk_u16_u8 :
    665   GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
    666   Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
    667   [IntrNoMem, IntrSpeculatable]
    668 >;
    669 
    670 def int_amdgcn_mqsad_pk_u16_u8 :
    671   GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
    672   Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
    673   [IntrNoMem, IntrSpeculatable]
    674 >;
    675 
    676 def int_amdgcn_mqsad_u32_u8 :
    677   GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
    678   Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty],
    679   [IntrNoMem, IntrSpeculatable]
    680 >;
    681 
    682 def int_amdgcn_cvt_pk_u8_f32 :
    683   GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
    684   Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
    685   [IntrNoMem, IntrSpeculatable]
    686 >;
    687 
    688 def int_amdgcn_icmp :
    689   Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
    690             [IntrNoMem, IntrConvergent]>;
    691 
    692 def int_amdgcn_fcmp :
    693   Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
    694             [IntrNoMem, IntrConvergent]>;
    695 
    696 def int_amdgcn_readfirstlane :
    697   GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
    698   Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    699 
    700 // The lane argument must be uniform across the currently active threads of the
    701 // current wave. Otherwise, the result is undefined.
    702 def int_amdgcn_readlane :
    703   GCCBuiltin<"__builtin_amdgcn_readlane">,
    704   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    705 
    706 def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty],
    707   [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    708   [IntrNoMem, IntrSpeculatable]
    709 >;
    710 
    711 def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty],
    712   [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    713   [IntrNoMem, IntrSpeculatable]
    714 >;
    715 
    716 //===----------------------------------------------------------------------===//
    717 // CI+ Intrinsics
    718 //===----------------------------------------------------------------------===//
    719 
    720 def int_amdgcn_s_dcache_inv_vol :
    721   GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
    722   Intrinsic<[], [], []>;
    723 
    724 def int_amdgcn_buffer_wbinvl1_vol :
    725   GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
    726   Intrinsic<[], [], []>;
    727 
    728 //===----------------------------------------------------------------------===//
    729 // VI Intrinsics
    730 //===----------------------------------------------------------------------===//
    731 
    732 // llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
    733 def int_amdgcn_mov_dpp :
    734   Intrinsic<[llvm_anyint_ty],
    735             [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
    736              llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
    737 
    738 def int_amdgcn_s_dcache_wb :
    739   GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
    740   Intrinsic<[], [], []>;
    741 
    742 def int_amdgcn_s_dcache_wb_vol :
    743   GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
    744   Intrinsic<[], [], []>;
    745 
    746 def int_amdgcn_s_memrealtime :
    747   GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
    748   Intrinsic<[llvm_i64_ty], [], []>;
    749 
    750 // llvm.amdgcn.ds.permute <index> <src>
    751 def int_amdgcn_ds_permute :
    752   GCCBuiltin<"__builtin_amdgcn_ds_permute">,
    753   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    754 
    755 // llvm.amdgcn.ds.bpermute <index> <src>
    756 def int_amdgcn_ds_bpermute :
    757   GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
    758   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    759 
    760 
    761 //===----------------------------------------------------------------------===//
    762 // Special Intrinsics for backend internal use only. No frontend
    763 // should emit calls to these.
    764 // ===----------------------------------------------------------------------===//
    765 def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
    766   [llvm_i1_ty], [IntrConvergent]
    767 >;
    768 
    769 def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
    770   [llvm_i64_ty], [IntrConvergent]
    771 >;
    772 
    773 def int_amdgcn_break : Intrinsic<[llvm_i64_ty],
    774   [llvm_i64_ty], [IntrNoMem, IntrConvergent]
    775 >;
    776 
    777 def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty],
    778   [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
    779 >;
    780 
    781 def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty],
    782   [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
    783 >;
    784 
    785 def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
    786   [llvm_i64_ty], [IntrConvergent]
    787 >;
    788 
    789 def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
    790 
    791 // Represent unreachable in a divergent region.
    792 def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>;
    793 
    794 // Emit 2.5 ulp, no denormal division. Should only be inserted by
    795 // pass based on !fpmath metadata.
    796 def int_amdgcn_fdiv_fast : Intrinsic<
    797   [llvm_float_ty], [llvm_float_ty, llvm_float_ty],
    798   [IntrNoMem, IntrSpeculatable]
    799 >;
    800 }
    801