Home | History | Annotate | Download | only in IR
      1 //===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines all of the R600-specific intrinsics.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 class AMDGPUReadPreloadRegisterIntrinsic
     15   : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
     16 
     17 class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
     18   : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<name>;
     19 
     20 let TargetPrefix = "r600" in {
     21 
     22 multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz {
     23   def _x : AMDGPUReadPreloadRegisterIntrinsic;
     24   def _y : AMDGPUReadPreloadRegisterIntrinsic;
     25   def _z : AMDGPUReadPreloadRegisterIntrinsic;
     26 }
     27 
     28 multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> {
     29   def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>;
     30   def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>;
     31   def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>;
     32 }
     33 
     34 defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
     35                                  <"__builtin_r600_read_global_size">;
     36 defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
     37                              <"__builtin_r600_read_ngroups">;
     38 defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
     39                           <"__builtin_r600_read_tgid">;
     40 
     41 defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz;
     42 defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz;
     43 
     44 def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">,
     45   Intrinsic<[], [], [IntrConvergent]>;
     46 
     47 // AS 7 is PARAM_I_ADDRESS, used for kernel arguments
     48 def int_r600_implicitarg_ptr :
     49   GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
     50   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [],
     51   [IntrNoMem, IntrSpeculatable]>;
     52 
     53 def int_r600_rat_store_typed :
     54   // 1st parameter: Data
     55   // 2nd parameter: Index
     56   // 3rd parameter: Constant RAT ID
     57   Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>,
     58   GCCBuiltin<"__builtin_r600_rat_store_typed">;
     59 
     60 def int_r600_recipsqrt_ieee :  Intrinsic<
     61   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
     62 >;
     63 
     64 def int_r600_recipsqrt_clamped : Intrinsic<
     65   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
     66 >;
     67 
     68 def int_r600_cube : Intrinsic<
     69   [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable]
     70 >;
     71 
     72 } // End TargetPrefix = "r600"
     73 
     74 let TargetPrefix = "amdgcn" in {
     75 
     76 //===----------------------------------------------------------------------===//
     77 // ABI Special Intrinsics
     78 //===----------------------------------------------------------------------===//
     79 
     80 defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz;
     81 defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
     82                                <"__builtin_amdgcn_workgroup_id">;
     83 
     84 def int_amdgcn_dispatch_ptr :
     85   GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
     86   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
     87   [IntrNoMem, IntrSpeculatable]>;
     88 
     89 def int_amdgcn_queue_ptr :
     90   GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
     91   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
     92   [IntrNoMem, IntrSpeculatable]>;
     93 
     94 def int_amdgcn_kernarg_segment_ptr :
     95   GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
     96   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
     97   [IntrNoMem, IntrSpeculatable]>;
     98 
     99 def int_amdgcn_implicitarg_ptr :
    100   GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
    101   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
    102   [IntrNoMem, IntrSpeculatable]>;
    103 
    104 def int_amdgcn_groupstaticsize :
    105   GCCBuiltin<"__builtin_amdgcn_groupstaticsize">,
    106   Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
    107 
    108 def int_amdgcn_dispatch_id :
    109   GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
    110   Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>;
    111 
    112 def int_amdgcn_implicit_buffer_ptr :
    113   GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
    114   Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
    115   [IntrNoMem, IntrSpeculatable]>;
    116 
    117 // Set EXEC to the 64-bit value given.
    118 // This is always moved to the beginning of the basic block.
    119 def int_amdgcn_init_exec : Intrinsic<[],
    120   [llvm_i64_ty],      // 64-bit literal constant
    121   [IntrConvergent]>;
    122 
    123 // Set EXEC according to a thread count packed in an SGPR input:
    124 //    thread_count = (input >> bitoffset) & 0x7f;
    125 // This is always moved to the beginning of the basic block.
    126 def int_amdgcn_init_exec_from_input : Intrinsic<[],
    127   [llvm_i32_ty,       // 32-bit SGPR input
    128    llvm_i32_ty],      // bit offset of the thread count
    129   [IntrConvergent]>;
    130 
    131 
    132 //===----------------------------------------------------------------------===//
    133 // Instruction Intrinsics
    134 //===----------------------------------------------------------------------===//
    135 
    136 // The first parameter is s_sendmsg immediate (i16),
    137 // the second one is copied to m0
    138 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
    139   Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
    140 def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
    141   Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
    142 
    143 def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
    144   Intrinsic<[], [], [IntrConvergent]>;
    145 
    146 def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
    147   Intrinsic<[], [], [IntrConvergent]>;
    148 
    149 def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
    150   Intrinsic<[], [llvm_i32_ty], []>;
    151 
    152 def int_amdgcn_div_scale : Intrinsic<
    153   // 1st parameter: Numerator
    154   // 2nd parameter: Denominator
    155   // 3rd parameter: Constant to select select between first and
    156   //                second. (0 = first, 1 = second).
    157   [llvm_anyfloat_ty, llvm_i1_ty],
    158   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
    159   [IntrNoMem, IntrSpeculatable]
    160 >;
    161 
    162 def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
    163   [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
    164   [IntrNoMem, IntrSpeculatable]
    165 >;
    166 
    167 def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty],
    168   [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
    169   [IntrNoMem, IntrSpeculatable]
    170 >;
    171 
    172 def int_amdgcn_trig_preop : Intrinsic<
    173   [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
    174   [IntrNoMem, IntrSpeculatable]
    175 >;
    176 
    177 def int_amdgcn_sin : Intrinsic<
    178   [llvm_anyfloat_ty], [LLVMMatchType<0>],
    179   [IntrNoMem, IntrSpeculatable]
    180 >;
    181 
    182 def int_amdgcn_cos : Intrinsic<
    183   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    184 >;
    185 
    186 def int_amdgcn_log_clamp : Intrinsic<
    187   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    188 >;
    189 
    190 def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
    191   Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
    192   [IntrNoMem, IntrSpeculatable]
    193 >;
    194 
    195 def int_amdgcn_rcp : Intrinsic<
    196   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    197 >;
    198 
    199 def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">,
    200   Intrinsic<[llvm_float_ty], [llvm_float_ty],
    201   [IntrNoMem, IntrSpeculatable]
    202 >;
    203 
    204 def int_amdgcn_rsq :  Intrinsic<
    205   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    206 >;
    207 
    208 def int_amdgcn_rsq_legacy :  GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
    209   Intrinsic<
    210   [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]
    211 >;
    212 
    213 def int_amdgcn_rsq_clamp : Intrinsic<
    214   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>;
    215 
    216 def int_amdgcn_ldexp : Intrinsic<
    217   [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
    218   [IntrNoMem, IntrSpeculatable]
    219 >;
    220 
    221 def int_amdgcn_frexp_mant : Intrinsic<
    222   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    223 >;
    224 
    225 def int_amdgcn_frexp_exp : Intrinsic<
    226   [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable]
    227 >;
    228 
    229 // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
    230 // and always uses rtz, so is not suitable for implementing the OpenCL
    231 // fract function. It should be ok on VI.
    232 def int_amdgcn_fract : Intrinsic<
    233   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    234 >;
    235 
    236 def int_amdgcn_cvt_pkrtz : Intrinsic<
    237   [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
    238   [IntrNoMem, IntrSpeculatable]
    239 >;
    240 
    241 def int_amdgcn_class : Intrinsic<
    242   [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
    243   [IntrNoMem, IntrSpeculatable]
    244 >;
    245 
    246 def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">,
    247   Intrinsic<[llvm_anyfloat_ty],
    248     [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
    249     [IntrNoMem, IntrSpeculatable]
    250 >;
    251 
    252 def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">,
    253   Intrinsic<[llvm_float_ty],
    254     [llvm_float_ty, llvm_float_ty, llvm_float_ty],
    255     [IntrNoMem, IntrSpeculatable]
    256 >;
    257 
    258 def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">,
    259   Intrinsic<[llvm_float_ty],
    260   [llvm_float_ty, llvm_float_ty, llvm_float_ty],
    261   [IntrNoMem, IntrSpeculatable]
    262 >;
    263 
    264 def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">,
    265   Intrinsic<[llvm_float_ty],
    266     [llvm_float_ty, llvm_float_ty, llvm_float_ty],
    267     [IntrNoMem, IntrSpeculatable]
    268 >;
    269 
    270 def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
    271   Intrinsic<[llvm_float_ty],
    272     [llvm_float_ty, llvm_float_ty, llvm_float_ty],
    273     [IntrNoMem, IntrSpeculatable]
    274 >;
    275 
    276 // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
    277 // should be used.
    278 def int_amdgcn_sffbh :
    279   Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>],
    280   [IntrNoMem, IntrSpeculatable]
    281 >;
    282 
    283 
    284 // Fields should mirror atomicrmw
    285 class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
    286   [llvm_anyptr_ty,
    287   LLVMMatchType<0>,
    288   llvm_i32_ty, // ordering
    289   llvm_i32_ty, // scope
    290   llvm_i1_ty], // isVolatile
    291   [IntrArgMemOnly, NoCapture<0>]
    292 >;
    293 
    294 def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
    295 def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
    296 
    297 class AMDGPUImageLoad : Intrinsic <
    298   [llvm_anyfloat_ty], // vdata(VGPR)
    299   [llvm_anyint_ty,    // vaddr(VGPR)
    300    llvm_anyint_ty,    // rsrc(SGPR)
    301    llvm_i32_ty,       // dmask(imm)
    302    llvm_i1_ty,        // glc(imm)
    303    llvm_i1_ty,        // slc(imm)
    304    llvm_i1_ty,        // lwe(imm)
    305    llvm_i1_ty],       // da(imm)
    306   [IntrReadMem]>;
    307 
    308 def int_amdgcn_image_load : AMDGPUImageLoad;
    309 def int_amdgcn_image_load_mip : AMDGPUImageLoad;
    310 def int_amdgcn_image_getresinfo : AMDGPUImageLoad;
    311 
    312 class AMDGPUImageStore : Intrinsic <
    313   [],
    314   [llvm_anyfloat_ty,  // vdata(VGPR)
    315    llvm_anyint_ty,    // vaddr(VGPR)
    316    llvm_anyint_ty,    // rsrc(SGPR)
    317    llvm_i32_ty,       // dmask(imm)
    318    llvm_i1_ty,        // glc(imm)
    319    llvm_i1_ty,        // slc(imm)
    320    llvm_i1_ty,        // lwe(imm)
    321    llvm_i1_ty],       // da(imm)
    322   []>;
    323 
    324 def int_amdgcn_image_store : AMDGPUImageStore;
    325 def int_amdgcn_image_store_mip : AMDGPUImageStore;
    326 
    327 class AMDGPUImageSample : Intrinsic <
    328     [llvm_anyfloat_ty], // vdata(VGPR)
    329     [llvm_anyfloat_ty,  // vaddr(VGPR)
    330      llvm_anyint_ty,    // rsrc(SGPR)
    331      llvm_v4i32_ty,     // sampler(SGPR)
    332      llvm_i32_ty,       // dmask(imm)
    333      llvm_i1_ty,        // unorm(imm)
    334      llvm_i1_ty,        // glc(imm)
    335      llvm_i1_ty,        // slc(imm)
    336      llvm_i1_ty,        // lwe(imm)
    337      llvm_i1_ty],       // da(imm)
    338      [IntrReadMem]>;
    339 
    340 // Basic sample
    341 def int_amdgcn_image_sample : AMDGPUImageSample;
    342 def int_amdgcn_image_sample_cl : AMDGPUImageSample;
    343 def int_amdgcn_image_sample_d : AMDGPUImageSample;
    344 def int_amdgcn_image_sample_d_cl : AMDGPUImageSample;
    345 def int_amdgcn_image_sample_l : AMDGPUImageSample;
    346 def int_amdgcn_image_sample_b : AMDGPUImageSample;
    347 def int_amdgcn_image_sample_b_cl : AMDGPUImageSample;
    348 def int_amdgcn_image_sample_lz : AMDGPUImageSample;
    349 def int_amdgcn_image_sample_cd : AMDGPUImageSample;
    350 def int_amdgcn_image_sample_cd_cl : AMDGPUImageSample;
    351 
    352 // Sample with comparison
    353 def int_amdgcn_image_sample_c : AMDGPUImageSample;
    354 def int_amdgcn_image_sample_c_cl : AMDGPUImageSample;
    355 def int_amdgcn_image_sample_c_d : AMDGPUImageSample;
    356 def int_amdgcn_image_sample_c_d_cl : AMDGPUImageSample;
    357 def int_amdgcn_image_sample_c_l : AMDGPUImageSample;
    358 def int_amdgcn_image_sample_c_b : AMDGPUImageSample;
    359 def int_amdgcn_image_sample_c_b_cl : AMDGPUImageSample;
    360 def int_amdgcn_image_sample_c_lz : AMDGPUImageSample;
    361 def int_amdgcn_image_sample_c_cd : AMDGPUImageSample;
    362 def int_amdgcn_image_sample_c_cd_cl : AMDGPUImageSample;
    363 
    364 // Sample with offsets
    365 def int_amdgcn_image_sample_o : AMDGPUImageSample;
    366 def int_amdgcn_image_sample_cl_o : AMDGPUImageSample;
    367 def int_amdgcn_image_sample_d_o : AMDGPUImageSample;
    368 def int_amdgcn_image_sample_d_cl_o : AMDGPUImageSample;
    369 def int_amdgcn_image_sample_l_o : AMDGPUImageSample;
    370 def int_amdgcn_image_sample_b_o : AMDGPUImageSample;
    371 def int_amdgcn_image_sample_b_cl_o : AMDGPUImageSample;
    372 def int_amdgcn_image_sample_lz_o : AMDGPUImageSample;
    373 def int_amdgcn_image_sample_cd_o : AMDGPUImageSample;
    374 def int_amdgcn_image_sample_cd_cl_o : AMDGPUImageSample;
    375 
    376 // Sample with comparison and offsets
    377 def int_amdgcn_image_sample_c_o : AMDGPUImageSample;
    378 def int_amdgcn_image_sample_c_cl_o : AMDGPUImageSample;
    379 def int_amdgcn_image_sample_c_d_o : AMDGPUImageSample;
    380 def int_amdgcn_image_sample_c_d_cl_o : AMDGPUImageSample;
    381 def int_amdgcn_image_sample_c_l_o : AMDGPUImageSample;
    382 def int_amdgcn_image_sample_c_b_o : AMDGPUImageSample;
    383 def int_amdgcn_image_sample_c_b_cl_o : AMDGPUImageSample;
    384 def int_amdgcn_image_sample_c_lz_o : AMDGPUImageSample;
    385 def int_amdgcn_image_sample_c_cd_o : AMDGPUImageSample;
    386 def int_amdgcn_image_sample_c_cd_cl_o : AMDGPUImageSample;
    387 
    388 // Basic gather4
    389 def int_amdgcn_image_gather4 : AMDGPUImageSample;
    390 def int_amdgcn_image_gather4_cl : AMDGPUImageSample;
    391 def int_amdgcn_image_gather4_l : AMDGPUImageSample;
    392 def int_amdgcn_image_gather4_b : AMDGPUImageSample;
    393 def int_amdgcn_image_gather4_b_cl : AMDGPUImageSample;
    394 def int_amdgcn_image_gather4_lz : AMDGPUImageSample;
    395 
    396 // Gather4 with comparison
    397 def int_amdgcn_image_gather4_c : AMDGPUImageSample;
    398 def int_amdgcn_image_gather4_c_cl : AMDGPUImageSample;
    399 def int_amdgcn_image_gather4_c_l : AMDGPUImageSample;
    400 def int_amdgcn_image_gather4_c_b : AMDGPUImageSample;
    401 def int_amdgcn_image_gather4_c_b_cl : AMDGPUImageSample;
    402 def int_amdgcn_image_gather4_c_lz : AMDGPUImageSample;
    403 
    404 // Gather4 with offsets
    405 def int_amdgcn_image_gather4_o : AMDGPUImageSample;
    406 def int_amdgcn_image_gather4_cl_o : AMDGPUImageSample;
    407 def int_amdgcn_image_gather4_l_o : AMDGPUImageSample;
    408 def int_amdgcn_image_gather4_b_o : AMDGPUImageSample;
    409 def int_amdgcn_image_gather4_b_cl_o : AMDGPUImageSample;
    410 def int_amdgcn_image_gather4_lz_o : AMDGPUImageSample;
    411 
    412 // Gather4 with comparison and offsets
    413 def int_amdgcn_image_gather4_c_o : AMDGPUImageSample;
    414 def int_amdgcn_image_gather4_c_cl_o : AMDGPUImageSample;
    415 def int_amdgcn_image_gather4_c_l_o : AMDGPUImageSample;
    416 def int_amdgcn_image_gather4_c_b_o : AMDGPUImageSample;
    417 def int_amdgcn_image_gather4_c_b_cl_o : AMDGPUImageSample;
    418 def int_amdgcn_image_gather4_c_lz_o : AMDGPUImageSample;
    419 
    420 def int_amdgcn_image_getlod : AMDGPUImageSample;
    421 
    422 class AMDGPUImageAtomic : Intrinsic <
    423   [llvm_i32_ty],
    424   [llvm_i32_ty,       // vdata(VGPR)
    425    llvm_anyint_ty,    // vaddr(VGPR)
    426    llvm_v8i32_ty,     // rsrc(SGPR)
    427    llvm_i1_ty,        // r128(imm)
    428    llvm_i1_ty,        // da(imm)
    429    llvm_i1_ty],       // slc(imm)
    430   []>;
    431 
    432 def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic;
    433 def int_amdgcn_image_atomic_add : AMDGPUImageAtomic;
    434 def int_amdgcn_image_atomic_sub : AMDGPUImageAtomic;
    435 def int_amdgcn_image_atomic_smin : AMDGPUImageAtomic;
    436 def int_amdgcn_image_atomic_umin : AMDGPUImageAtomic;
    437 def int_amdgcn_image_atomic_smax : AMDGPUImageAtomic;
    438 def int_amdgcn_image_atomic_umax : AMDGPUImageAtomic;
    439 def int_amdgcn_image_atomic_and : AMDGPUImageAtomic;
    440 def int_amdgcn_image_atomic_or : AMDGPUImageAtomic;
    441 def int_amdgcn_image_atomic_xor : AMDGPUImageAtomic;
    442 def int_amdgcn_image_atomic_inc : AMDGPUImageAtomic;
    443 def int_amdgcn_image_atomic_dec : AMDGPUImageAtomic;
    444 def int_amdgcn_image_atomic_cmpswap : Intrinsic <
    445   [llvm_i32_ty],
    446   [llvm_i32_ty,       // src(VGPR)
    447    llvm_i32_ty,       // cmp(VGPR)
    448    llvm_anyint_ty,    // vaddr(VGPR)
    449    llvm_v8i32_ty,     // rsrc(SGPR)
    450    llvm_i1_ty,        // r128(imm)
    451    llvm_i1_ty,        // da(imm)
    452    llvm_i1_ty],       // slc(imm)
    453   []>;
    454 
    455 class AMDGPUBufferLoad : Intrinsic <
    456   [llvm_anyfloat_ty],
    457   [llvm_v4i32_ty,     // rsrc(SGPR)
    458    llvm_i32_ty,       // vindex(VGPR)
    459    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    460    llvm_i1_ty,        // glc(imm)
    461    llvm_i1_ty],       // slc(imm)
    462   [IntrReadMem]>;
    463 def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
    464 def int_amdgcn_buffer_load : AMDGPUBufferLoad;
    465 
    466 class AMDGPUBufferStore : Intrinsic <
    467   [],
    468   [llvm_anyfloat_ty,  // vdata(VGPR) -- can currently only select f32, v2f32, v4f32
    469    llvm_v4i32_ty,     // rsrc(SGPR)
    470    llvm_i32_ty,       // vindex(VGPR)
    471    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    472    llvm_i1_ty,        // glc(imm)
    473    llvm_i1_ty],       // slc(imm)
    474   [IntrWriteMem]>;
    475 def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
    476 def int_amdgcn_buffer_store : AMDGPUBufferStore;
    477 
    478 def int_amdgcn_tbuffer_load : Intrinsic <
    479     [llvm_any_ty],    // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
    480     [llvm_v4i32_ty,   // rsrc(SGPR)
    481      llvm_i32_ty,     // vindex(VGPR)
    482      llvm_i32_ty,     // voffset(VGPR)
    483      llvm_i32_ty,     // soffset(SGPR)
    484      llvm_i32_ty,     // offset(imm)
    485      llvm_i32_ty,     // dfmt(imm)
    486      llvm_i32_ty,     // nfmt(imm)
    487      llvm_i1_ty,     // glc(imm)
    488      llvm_i1_ty],    // slc(imm)
    489     []>;
    490 
    491 def int_amdgcn_tbuffer_store : Intrinsic <
    492     [],
    493     [llvm_any_ty,    // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
    494      llvm_v4i32_ty,  // rsrc(SGPR)
    495      llvm_i32_ty,    // vindex(VGPR)
    496      llvm_i32_ty,    // voffset(VGPR)
    497      llvm_i32_ty,    // soffset(SGPR)
    498      llvm_i32_ty,    // offset(imm)
    499      llvm_i32_ty,    // dfmt(imm)
    500      llvm_i32_ty,    // nfmt(imm)
    501      llvm_i1_ty,     // glc(imm)
    502      llvm_i1_ty],    // slc(imm)
    503     []>;
    504 
    505 class AMDGPUBufferAtomic : Intrinsic <
    506   [llvm_i32_ty],
    507   [llvm_i32_ty,       // vdata(VGPR)
    508    llvm_v4i32_ty,     // rsrc(SGPR)
    509    llvm_i32_ty,       // vindex(VGPR)
    510    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    511    llvm_i1_ty],       // slc(imm)
    512   []>;
    513 def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
    514 def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
    515 def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
    516 def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic;
    517 def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic;
    518 def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic;
    519 def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic;
    520 def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic;
    521 def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic;
    522 def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic;
    523 def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
    524   [llvm_i32_ty],
    525   [llvm_i32_ty,       // src(VGPR)
    526    llvm_i32_ty,       // cmp(VGPR)
    527    llvm_v4i32_ty,     // rsrc(SGPR)
    528    llvm_i32_ty,       // vindex(VGPR)
    529    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    530    llvm_i1_ty],       // slc(imm)
    531   []>;
    532 
    533 // Uses that do not set the done bit should set IntrWriteMem on the
    534 // call site.
    535 def int_amdgcn_exp : Intrinsic <[], [
    536   llvm_i32_ty,       // tgt,
    537   llvm_i32_ty,       // en
    538   llvm_any_ty,       // src0 (f32 or i32)
    539   LLVMMatchType<0>,  // src1
    540   LLVMMatchType<0>,  // src2
    541   LLVMMatchType<0>,  // src3
    542   llvm_i1_ty,        // done
    543   llvm_i1_ty         // vm
    544   ],
    545   []
    546 >;
    547 
    548 // exp with compr bit set.
    549 def int_amdgcn_exp_compr : Intrinsic <[], [
    550   llvm_i32_ty,       // tgt,
    551   llvm_i32_ty,       // en
    552   llvm_anyvector_ty, // src0 (v2f16 or v2i16)
    553   LLVMMatchType<0>,  // src1
    554   llvm_i1_ty,        // done
    555   llvm_i1_ty],       // vm
    556   []
    557 >;
    558 
    559 def int_amdgcn_buffer_wbinvl1_sc :
    560   GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
    561   Intrinsic<[], [], []>;
    562 
    563 def int_amdgcn_buffer_wbinvl1 :
    564   GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
    565   Intrinsic<[], [], []>;
    566 
    567 def int_amdgcn_s_dcache_inv :
    568   GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
    569   Intrinsic<[], [], []>;
    570 
    571 def int_amdgcn_s_memtime :
    572   GCCBuiltin<"__builtin_amdgcn_s_memtime">,
    573   Intrinsic<[llvm_i64_ty], [], []>;
    574 
    575 def int_amdgcn_s_sleep :
    576   GCCBuiltin<"__builtin_amdgcn_s_sleep">,
    577   Intrinsic<[], [llvm_i32_ty], []> {
    578 }
    579 
    580 def int_amdgcn_s_incperflevel :
    581   GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
    582   Intrinsic<[], [llvm_i32_ty], []> {
    583 }
    584 
    585 def int_amdgcn_s_decperflevel :
    586   GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
    587   Intrinsic<[], [llvm_i32_ty], []> {
    588 }
    589 
    590 def int_amdgcn_s_getreg :
    591   GCCBuiltin<"__builtin_amdgcn_s_getreg">,
    592   Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
    593   [IntrReadMem, IntrSpeculatable]
    594 >;
    595 
    596 // int_amdgcn_s_getpc is provided to allow a specific style of position
    597 // independent code to determine the high part of its address when it is
    598 // known (through convention) that the code and any data of interest does
    599 // not cross a 4Gb address boundary. Use for any other purpose may not
    600 // produce the desired results as optimizations may cause code movement,
    601 // especially as we explicitly use IntrNoMem to allow optimizations.
    602 def int_amdgcn_s_getpc :
    603   GCCBuiltin<"__builtin_amdgcn_s_getpc">,
    604   Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>;
    605 
    606 // __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0>
    607 // param values: 0 = P10, 1 = P20, 2 = P0
    608 def int_amdgcn_interp_mov :
    609   GCCBuiltin<"__builtin_amdgcn_interp_mov">,
    610   Intrinsic<[llvm_float_ty],
    611             [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    612             [IntrNoMem, IntrSpeculatable]>;
    613 
    614 // __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
    615 // This intrinsic reads from lds, but the memory values are constant,
    616 // so it behaves like IntrNoMem.
    617 def int_amdgcn_interp_p1 :
    618   GCCBuiltin<"__builtin_amdgcn_interp_p1">,
    619   Intrinsic<[llvm_float_ty],
    620             [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    621             [IntrNoMem, IntrSpeculatable]>;
    622 
    623 // __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
    624 def int_amdgcn_interp_p2 :
    625   GCCBuiltin<"__builtin_amdgcn_interp_p2">,
    626   Intrinsic<[llvm_float_ty],
    627             [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    628             [IntrNoMem, IntrSpeculatable]>;
    629           // See int_amdgcn_v_interp_p1 for why this is IntrNoMem.
    630 
    631 // Pixel shaders only: whether the current pixel is live (i.e. not a helper
    632 // invocation for derivative computation).
    633 def int_amdgcn_ps_live : Intrinsic <
    634   [llvm_i1_ty],
    635   [],
    636   [IntrNoMem]>;
    637 
    638 def int_amdgcn_mbcnt_lo :
    639   GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
    640   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    641 
    642 def int_amdgcn_mbcnt_hi :
    643   GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
    644   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
    645 
    646 // llvm.amdgcn.ds.swizzle src offset
    647 def int_amdgcn_ds_swizzle :
    648   GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
    649   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    650 
    651 def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
    652   [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
    653   [IntrNoMem, IntrSpeculatable]
    654 >;
    655 
    656 def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
    657   [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
    658   [IntrNoMem, IntrSpeculatable]
    659 >;
    660 
    661 def int_amdgcn_lerp :
    662   GCCBuiltin<"__builtin_amdgcn_lerp">,
    663   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    664   [IntrNoMem, IntrSpeculatable]
    665 >;
    666 
    667 def int_amdgcn_sad_u8 :
    668   GCCBuiltin<"__builtin_amdgcn_sad_u8">,
    669   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    670   [IntrNoMem, IntrSpeculatable]
    671 >;
    672 
    673 def int_amdgcn_msad_u8 :
    674   GCCBuiltin<"__builtin_amdgcn_msad_u8">,
    675   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    676   [IntrNoMem, IntrSpeculatable]
    677 >;
    678 
    679 def int_amdgcn_sad_hi_u8 :
    680   GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">,
    681   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    682   [IntrNoMem, IntrSpeculatable]
    683 >;
    684 
    685 def int_amdgcn_sad_u16 :
    686   GCCBuiltin<"__builtin_amdgcn_sad_u16">,
    687   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    688   [IntrNoMem, IntrSpeculatable]
    689 >;
    690 
    691 def int_amdgcn_qsad_pk_u16_u8 :
    692   GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
    693   Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
    694   [IntrNoMem, IntrSpeculatable]
    695 >;
    696 
    697 def int_amdgcn_mqsad_pk_u16_u8 :
    698   GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
    699   Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
    700   [IntrNoMem, IntrSpeculatable]
    701 >;
    702 
    703 def int_amdgcn_mqsad_u32_u8 :
    704   GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
    705   Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty],
    706   [IntrNoMem, IntrSpeculatable]
    707 >;
    708 
    709 def int_amdgcn_cvt_pk_u8_f32 :
    710   GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
    711   Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
    712   [IntrNoMem, IntrSpeculatable]
    713 >;
    714 
    715 def int_amdgcn_icmp :
    716   Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
    717             [IntrNoMem, IntrConvergent]>;
    718 
    719 def int_amdgcn_fcmp :
    720   Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
    721             [IntrNoMem, IntrConvergent]>;
    722 
    723 def int_amdgcn_readfirstlane :
    724   GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
    725   Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    726 
    727 // The lane argument must be uniform across the currently active threads of the
    728 // current wave. Otherwise, the result is undefined.
    729 def int_amdgcn_readlane :
    730   GCCBuiltin<"__builtin_amdgcn_readlane">,
    731   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    732 
    733 def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty],
    734   [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    735   [IntrNoMem, IntrSpeculatable]
    736 >;
    737 
    738 def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty],
    739   [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    740   [IntrNoMem, IntrSpeculatable]
    741 >;
    742 
    743 
    744 // Copies the source value to the destination value, with the guarantee that
    745 // the source value is computed as if the entire program were executed in WQM.
    746 def int_amdgcn_wqm : Intrinsic<[llvm_any_ty],
    747   [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    748 >;
    749 
    750 // Copies the active channels of the source value to the destination value,
    751 // with the guarantee that the source value is computed as if the entire
    752 // program were executed in Whole Wavefront Mode, i.e. with all channels
    753 // enabled, with a few exceptions: - Phi nodes with require WWM return an
    754 // undefined value.
    755 def int_amdgcn_wwm : Intrinsic<[llvm_any_ty],
    756   [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
    757 >;
    758 
    759 // Given a value, copies it while setting all the inactive lanes to a given
    760 // value. Note that OpenGL helper lanes are considered active, so if the
    761 // program ever uses WQM, then the instruction and the first source will be
    762 // computed in WQM.
    763 def int_amdgcn_set_inactive :
    764   Intrinsic<[llvm_anyint_ty],
    765             [LLVMMatchType<0>, // value to be copied
    766              LLVMMatchType<0>], // value for the inactive lanes to take
    767             [IntrNoMem, IntrConvergent]>;
    768 
    769 //===----------------------------------------------------------------------===//
    770 // CI+ Intrinsics
    771 //===----------------------------------------------------------------------===//
    772 
    773 def int_amdgcn_s_dcache_inv_vol :
    774   GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
    775   Intrinsic<[], [], []>;
    776 
    777 def int_amdgcn_buffer_wbinvl1_vol :
    778   GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
    779   Intrinsic<[], [], []>;
    780 
    781 //===----------------------------------------------------------------------===//
    782 // VI Intrinsics
    783 //===----------------------------------------------------------------------===//
    784 
    785 // llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
    786 def int_amdgcn_mov_dpp :
    787   Intrinsic<[llvm_anyint_ty],
    788             [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
    789              llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
    790 
    791 // llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
    792 // Should be equivalent to:
    793 // v_mov_b32 <dest> <old>
    794 // v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
    795 def int_amdgcn_update_dpp :
    796   Intrinsic<[llvm_anyint_ty],
    797             [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty,
    798              llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
    799 
    800 def int_amdgcn_s_dcache_wb :
    801   GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
    802   Intrinsic<[], [], []>;
    803 
    804 def int_amdgcn_s_dcache_wb_vol :
    805   GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
    806   Intrinsic<[], [], []>;
    807 
    808 def int_amdgcn_s_memrealtime :
    809   GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
    810   Intrinsic<[llvm_i64_ty], [], []>;
    811 
    812 // llvm.amdgcn.ds.permute <index> <src>
    813 def int_amdgcn_ds_permute :
    814   GCCBuiltin<"__builtin_amdgcn_ds_permute">,
    815   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    816 
    817 // llvm.amdgcn.ds.bpermute <index> <src>
    818 def int_amdgcn_ds_bpermute :
    819   GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
    820   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
    821 
    822 
    823 //===----------------------------------------------------------------------===//
    824 // Special Intrinsics for backend internal use only. No frontend
    825 // should emit calls to these.
    826 // ===----------------------------------------------------------------------===//
    827 def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
    828   [llvm_i1_ty], [IntrConvergent]
    829 >;
    830 
    831 def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
    832   [llvm_i64_ty], [IntrConvergent]
    833 >;
    834 
    835 def int_amdgcn_break : Intrinsic<[llvm_i64_ty],
    836   [llvm_i64_ty], [IntrNoMem, IntrConvergent]
    837 >;
    838 
    839 def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty],
    840   [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
    841 >;
    842 
    843 def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty],
    844   [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
    845 >;
    846 
    847 def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
    848   [llvm_i64_ty], [IntrConvergent]
    849 >;
    850 
    851 def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
    852 
    853 // Represent unreachable in a divergent region.
    854 def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>;
    855 
    856 // Emit 2.5 ulp, no denormal division. Should only be inserted by
    857 // pass based on !fpmath metadata.
    858 def int_amdgcn_fdiv_fast : Intrinsic<
    859   [llvm_float_ty], [llvm_float_ty, llvm_float_ty],
    860   [IntrNoMem, IntrSpeculatable]
    861 >;
    862 }
    863