Home | History | Annotate | Download | only in AMDGPU
      1 //===-- CIInstructions.td - CI Instruction Defintions ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 // Instruction definitions for CI and newer.
     10 //===----------------------------------------------------------------------===//
     11 // Remaining instructions:
     12 // S_CBRANCH_CDBGUSER
     13 // S_CBRANCH_CDBGSYS
     14 // S_CBRANCH_CDBGSYS_OR_USER
     15 // S_CBRANCH_CDBGSYS_AND_USER
     16 // DS_NOP
     17 // DS_GWS_SEMA_RELEASE_ALL
     18 // DS_WRAP_RTN_B32
     19 // DS_CNDXCHG32_RTN_B64
     20 // DS_WRITE_B96
     21 // DS_WRITE_B128
     22 // DS_CONDXCHG32_RTN_B128
     23 // DS_READ_B96
     24 // DS_READ_B128
     25 // BUFFER_LOAD_DWORDX3
     26 // BUFFER_STORE_DWORDX3
     27 
     28 //===----------------------------------------------------------------------===//
     29 // VOP1 Instructions
     30 //===----------------------------------------------------------------------===//
     31 
     32 let SubtargetPredicate = isCIVI in {
     33 
     34 let SchedRW = [WriteDoubleAdd] in {
     35 defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
     36   VOP_F64_F64, ftrunc
     37 >;
     38 defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64",
     39   VOP_F64_F64, fceil
     40 >;
     41 defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
     42   VOP_F64_F64, ffloor
     43 >;
     44 defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
     45   VOP_F64_F64, frint
     46 >;
     47 } // End SchedRW = [WriteDoubleAdd]
     48 
     49 let SchedRW = [WriteQuarterRate32] in {
     50 defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
     51   VOP_F32_F32
     52 >;
     53 defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
     54   VOP_F32_F32
     55 >;
     56 } // End SchedRW = [WriteQuarterRate32]
     57 
     58 //===----------------------------------------------------------------------===//
     59 // VOP3 Instructions
     60 //===----------------------------------------------------------------------===//
     61 
     62 defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
     63   VOP_I32_I32_I32
     64 >;
     65 defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
     66   VOP_I32_I32_I32
     67 >;
     68 defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
     69   VOP_I32_I32_I32
     70 >;
     71 
     72 let isCommutable = 1 in {
     73 defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
     74   VOP_I64_I32_I32_I64
     75 >;
     76 
     77 // XXX - Does this set VCC?
     78 defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
     79   VOP_I64_I32_I32_I64
     80 >;
     81 } // End isCommutable = 1
     82 
     83 
     84 //===----------------------------------------------------------------------===//
     85 // DS Instructions
     86 //===----------------------------------------------------------------------===//
     87 defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
     88 
     89 // DS_CONDXCHG32_RTN_B64
     90 // DS_CONDXCHG32_RTN_B128
     91 
     92 //===----------------------------------------------------------------------===//
     93 // SMRD Instructions
     94 //===----------------------------------------------------------------------===//
     95 
     96 defm S_DCACHE_INV_VOL : SMRD_Inval <smrd<0x1d, 0x22>,
     97   "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
     98 
     99 //===----------------------------------------------------------------------===//
    100 // MUBUF Instructions
    101 //===----------------------------------------------------------------------===//
    102 
    103 let DisableSIDecoder = 1 in {
    104 defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate <mubuf<0x70, 0x3f>,
    105   "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol
    106 >;
    107 }
    108 
    109 //===----------------------------------------------------------------------===//
    110 // Flat Instructions
    111 //===----------------------------------------------------------------------===//
    112 
    113 defm FLAT_LOAD_UBYTE : FLAT_Load_Helper <
    114   flat<0x8, 0x10>, "flat_load_ubyte", VGPR_32
    115 >;
    116 defm FLAT_LOAD_SBYTE : FLAT_Load_Helper <
    117   flat<0x9, 0x11>, "flat_load_sbyte", VGPR_32
    118 >;
    119 defm FLAT_LOAD_USHORT : FLAT_Load_Helper <
    120   flat<0xa, 0x12>, "flat_load_ushort", VGPR_32
    121 >;
    122 defm FLAT_LOAD_SSHORT : FLAT_Load_Helper <
    123   flat<0xb, 0x13>, "flat_load_sshort", VGPR_32>
    124 ;
    125 defm FLAT_LOAD_DWORD : FLAT_Load_Helper <
    126   flat<0xc, 0x14>, "flat_load_dword", VGPR_32
    127 >;
    128 defm FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <
    129   flat<0xd, 0x15>, "flat_load_dwordx2", VReg_64
    130 >;
    131 defm FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <
    132   flat<0xe, 0x17>, "flat_load_dwordx4", VReg_128
    133 >;
    134 defm FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <
    135   flat<0xf, 0x16>, "flat_load_dwordx3", VReg_96
    136 >;
    137 defm FLAT_STORE_BYTE : FLAT_Store_Helper <
    138   flat<0x18>, "flat_store_byte", VGPR_32
    139 >;
    140 defm FLAT_STORE_SHORT : FLAT_Store_Helper <
    141   flat <0x1a>, "flat_store_short", VGPR_32
    142 >;
    143 defm FLAT_STORE_DWORD : FLAT_Store_Helper <
    144   flat<0x1c>, "flat_store_dword", VGPR_32
    145 >;
    146 defm FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
    147   flat<0x1d>, "flat_store_dwordx2", VReg_64
    148 >;
    149 defm FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
    150   flat<0x1e, 0x1f>, "flat_store_dwordx4", VReg_128
    151 >;
    152 defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
    153   flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96
    154 >;
    155 defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <
    156   flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32, i32, atomic_swap_flat
    157 >;
    158 defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC <
    159   flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, i32,
    160     atomic_cmp_swap_flat, v2i32, VReg_64
    161 >;
    162 defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <
    163   flat<0x32, 0x42>, "flat_atomic_add", VGPR_32, i32, atomic_add_flat
    164 >;
    165 defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <
    166   flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32, i32, atomic_sub_flat
    167 >;
    168 defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <
    169   flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32, i32, atomic_min_flat
    170 >;
    171 defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <
    172   flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32, i32, atomic_umin_flat
    173 >;
    174 defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <
    175   flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32, i32, atomic_max_flat
    176 >;
    177 defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <
    178   flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32, i32, atomic_umax_flat
    179 >;
    180 defm FLAT_ATOMIC_AND : FLAT_ATOMIC <
    181   flat<0x39, 0x48>, "flat_atomic_and", VGPR_32, i32, atomic_and_flat
    182 >;
    183 defm FLAT_ATOMIC_OR : FLAT_ATOMIC <
    184   flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32, i32, atomic_or_flat
    185 >;
    186 defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <
    187   flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32, i32, atomic_xor_flat
    188 >;
    189 defm FLAT_ATOMIC_INC : FLAT_ATOMIC <
    190   flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32, i32, atomic_inc_flat
    191 >;
    192 defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <
    193   flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32, i32, atomic_dec_flat
    194 >;
    195 defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <
    196   flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64, i64, atomic_swap_flat
    197 >;
    198 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC <
    199   flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, i64,
    200     atomic_cmp_swap_flat, v2i64, VReg_128
    201 >;
    202 defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <
    203   flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64, i64, atomic_add_flat
    204 >;
    205 defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <
    206   flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64, i64, atomic_sub_flat
    207 >;
    208 defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <
    209   flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64, i64, atomic_min_flat
    210 >;
    211 defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <
    212   flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64, i64, atomic_umin_flat
    213 >;
    214 defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <
    215   flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64, i64, atomic_max_flat
    216 >;
    217 defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <
    218   flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64, i64, atomic_umax_flat
    219 >;
    220 defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <
    221   flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64, i64, atomic_and_flat
    222 >;
    223 defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <
    224   flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64, i64, atomic_or_flat
    225 >;
    226 defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <
    227   flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64, i64, atomic_xor_flat
    228 >;
    229 defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <
    230   flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64, i64, atomic_inc_flat
    231 >;
    232 defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <
    233   flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64, i64, atomic_dec_flat
    234 >;
    235 
    236 } // End SubtargetPredicate = isCIVI
    237 
    238 // CI Only flat instructions
    239 
    240 let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 in {
    241 
    242 defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC <
    243   flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, f32,
    244     null_frag, v2f32, VReg_64
    245 >;
    246 defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <
    247   flat<0x3f>, "flat_atomic_fmin", VGPR_32, f32
    248 >;
    249 defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <
    250   flat<0x40>, "flat_atomic_fmax", VGPR_32, f32
    251 >;
    252 defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC <
    253   flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, f64,
    254   null_frag, v2f64, VReg_128
    255 >;
    256 defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <
    257   flat<0x5f>, "flat_atomic_fmin_x2", VReg_64, f64
    258 >;
    259 defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
    260   flat<0x60>, "flat_atomic_fmax_x2", VReg_64, f64
    261 >;
    262 
    263 } // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1
    264 
    265 //===----------------------------------------------------------------------===//
    266 // Flat Patterns
    267 //===----------------------------------------------------------------------===//
    268 
    269 let Predicates = [isCIVI] in {
    270 
    271 // Patterns for global loads with no offset.
    272 class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
    273   (vt (node i64:$addr)),
    274   (inst $addr, 0, 0, 0)
    275 >;
    276 
    277 class FlatLoadAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
    278   (vt (node i64:$addr)),
    279   (inst $addr, 1, 0, 0)
    280 >;
    281 
    282 def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
    283 def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
    284 def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
    285 def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
    286 def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
    287 def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
    288 def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
    289 
    290 def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>;
    291 def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>;
    292 
    293 
    294 class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
    295   (node vt:$data, i64:$addr),
    296   (inst $addr, $data, 0, 0, 0)
    297 >;
    298 
    299 class FlatStoreAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
    300   // atomic store follows atomic binop convention so the address comes
    301   // first.
    302   (node i64:$addr, vt:$data),
    303   (inst $addr, $data, 1, 0, 0)
    304 >;
    305 
    306 def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
    307 def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
    308 def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
    309 def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
    310 def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
    311 
    312 def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>;
    313 def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>;
    314 
    315 class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt,
    316                      ValueType data_vt = vt> : Pat <
    317   (vt (node i64:$addr, data_vt:$data)),
    318   (inst $addr, $data, 0, 0)
    319 >;
    320 
    321 def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
    322 def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
    323 def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>;
    324 def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
    325 def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
    326 def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
    327 def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
    328 def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
    329 def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
    330 def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
    331 def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
    332 def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, atomic_cmp_swap_global, i32, v2i32>;
    333 def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
    334 
    335 def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
    336 def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
    337 def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
    338 def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
    339 def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
    340 def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
    341 def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
    342 def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
    343 def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
    344 def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
    345 def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
    346 def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, atomic_cmp_swap_global, i64, v2i64>;
    347 def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
    348 
    349 } // End Predicates = [isCIVI]
    350