Home | History | Annotate | Download | only in AMDGPU
      1 //===-- CIInstructions.td - CI Instruction Defintions ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 // Instruction definitions for CI and newer.
     10 //===----------------------------------------------------------------------===//
     11 // Remaining instructions:
     12 // S_CBRANCH_CDBGUSER
     13 // S_CBRANCH_CDBGSYS
     14 // S_CBRANCH_CDBGSYS_OR_USER
     15 // S_CBRANCH_CDBGSYS_AND_USER
     16 // DS_NOP
     17 // DS_GWS_SEMA_RELEASE_ALL
     18 // DS_WRAP_RTN_B32
     19 // DS_CNDXCHG32_RTN_B64
     20 // DS_WRITE_B96
     21 // DS_WRITE_B128
     22 // DS_CONDXCHG32_RTN_B128
     23 // DS_READ_B96
     24 // DS_READ_B128
     25 // BUFFER_LOAD_DWORDX3
     26 // BUFFER_STORE_DWORDX3
     27 
     28 
     29 def isCIVI : Predicate <
     30   "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
     31   "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
     32 >, AssemblerPredicate<"FeatureCIInsts">;
     33 
     34 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
     35 
     36 //===----------------------------------------------------------------------===//
     37 // VOP1 Instructions
     38 //===----------------------------------------------------------------------===//
     39 
     40 let SubtargetPredicate = isCIVI in {
     41 
     42 let SchedRW = [WriteDoubleAdd] in {
     43 defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
     44   VOP_F64_F64, ftrunc
     45 >;
     46 defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64",
     47   VOP_F64_F64, fceil
     48 >;
     49 defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
     50   VOP_F64_F64, ffloor
     51 >;
     52 defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
     53   VOP_F64_F64, frint
     54 >;
     55 } // End SchedRW = [WriteDoubleAdd]
     56 
     57 let SchedRW = [WriteQuarterRate32] in {
     58 defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
     59   VOP_F32_F32
     60 >;
     61 defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
     62   VOP_F32_F32
     63 >;
     64 } // End SchedRW = [WriteQuarterRate32]
     65 
     66 //===----------------------------------------------------------------------===//
     67 // VOP3 Instructions
     68 //===----------------------------------------------------------------------===//
     69 
     70 defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
     71   VOP_I32_I32_I32
     72 >;
     73 defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
     74   VOP_I32_I32_I32
     75 >;
     76 defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
     77   VOP_I32_I32_I32
     78 >;
     79 
     80 let isCommutable = 1 in {
     81 defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
     82   VOP_I64_I32_I32_I64
     83 >;
     84 
     85 // XXX - Does this set VCC?
     86 defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
     87   VOP_I64_I32_I32_I64
     88 >;
     89 } // End isCommutable = 1
     90 
     91 
     92 //===----------------------------------------------------------------------===//
     93 // DS Instructions
     94 //===----------------------------------------------------------------------===//
     95 defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
     96 
     97 // DS_CONDXCHG32_RTN_B64
     98 // DS_CONDXCHG32_RTN_B128
     99 
    100 //===----------------------------------------------------------------------===//
    101 // SMRD Instructions
    102 //===----------------------------------------------------------------------===//
    103 
    104 defm S_DCACHE_INV_VOL : SMRD_Inval <smrd<0x1d, 0x22>,
    105   "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
    106 
    107 //===----------------------------------------------------------------------===//
    108 // MUBUF Instructions
    109 //===----------------------------------------------------------------------===//
    110 
    111 defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate <mubuf<0x70, 0x3f>,
    112   "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol
    113 >;
    114 
    115 //===----------------------------------------------------------------------===//
    116 // Flat Instructions
    117 //===----------------------------------------------------------------------===//
    118 
    119 def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x8, "flat_load_ubyte", VGPR_32>;
    120 def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x9, "flat_load_sbyte", VGPR_32>;
    121 def FLAT_LOAD_USHORT : FLAT_Load_Helper <0xa, "flat_load_ushort", VGPR_32>;
    122 def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0xb, "flat_load_sshort", VGPR_32>;
    123 def FLAT_LOAD_DWORD : FLAT_Load_Helper <0xc, "flat_load_dword", VGPR_32>;
    124 def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0xd, "flat_load_dwordx2", VReg_64>;
    125 def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0xe, "flat_load_dwordx4", VReg_128>;
    126 def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0xf, "flat_load_dwordx3", VReg_96>;
    127 def FLAT_STORE_BYTE : FLAT_Store_Helper <0x18, "flat_store_byte", VGPR_32>;
    128 def FLAT_STORE_SHORT : FLAT_Store_Helper <0x1a, "flat_store_short", VGPR_32>;
    129 def FLAT_STORE_DWORD : FLAT_Store_Helper <0x1c, "flat_store_dword", VGPR_32>;
    130 def FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
    131   0x1d, "flat_store_dwordx2", VReg_64
    132 >;
    133 def FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
    134   0x1e, "flat_store_dwordx4", VReg_128
    135 >;
    136 def FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
    137   0x1f, "flat_store_dwordx3", VReg_96
    138 >;
    139 defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <0x30, "flat_atomic_swap", VGPR_32>;
    140 defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC <
    141   0x31, "flat_atomic_cmpswap", VGPR_32, VReg_64
    142 >;
    143 defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <0x32, "flat_atomic_add", VGPR_32>;
    144 defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <0x33, "flat_atomic_sub", VGPR_32>;
    145 defm FLAT_ATOMIC_RSUB : FLAT_ATOMIC <0x34, "flat_atomic_rsub", VGPR_32>;
    146 defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <0x35, "flat_atomic_smin", VGPR_32>;
    147 defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <0x36, "flat_atomic_umin", VGPR_32>;
    148 defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <0x37, "flat_atomic_smax", VGPR_32>;
    149 defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <0x38, "flat_atomic_umax", VGPR_32>;
    150 defm FLAT_ATOMIC_AND : FLAT_ATOMIC <0x39, "flat_atomic_and", VGPR_32>;
    151 defm FLAT_ATOMIC_OR : FLAT_ATOMIC <0x3a, "flat_atomic_or", VGPR_32>;
    152 defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <0x3b, "flat_atomic_xor", VGPR_32>;
    153 defm FLAT_ATOMIC_INC : FLAT_ATOMIC <0x3c, "flat_atomic_inc", VGPR_32>;
    154 defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <0x3d, "flat_atomic_dec", VGPR_32>;
    155 defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC <
    156   0x3e, "flat_atomic_fcmpswap", VGPR_32, VReg_64
    157 >;
    158 defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <0x3f, "flat_atomic_fmin", VGPR_32>;
    159 defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <0x40, "flat_atomic_fmax", VGPR_32>;
    160 defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <0x50, "flat_atomic_swap_x2", VReg_64>;
    161 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC <
    162   0x51, "flat_atomic_cmpswap_x2", VReg_64, VReg_128
    163 >;
    164 defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <0x52, "flat_atomic_add_x2", VReg_64>;
    165 defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <0x53, "flat_atomic_sub_x2", VReg_64>;
    166 defm FLAT_ATOMIC_RSUB_X2 : FLAT_ATOMIC <0x54, "flat_atomic_rsub_x2", VReg_64>;
    167 defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <0x55, "flat_atomic_smin_x2", VReg_64>;
    168 defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <0x56, "flat_atomic_umin_x2", VReg_64>;
    169 defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <0x57, "flat_atomic_smax_x2", VReg_64>;
    170 defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <0x58, "flat_atomic_umax_x2", VReg_64>;
    171 defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <0x59, "flat_atomic_and_x2", VReg_64>;
    172 defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <0x5a, "flat_atomic_or_x2", VReg_64>;
    173 defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <0x5b, "flat_atomic_xor_x2", VReg_64>;
    174 defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <0x5c, "flat_atomic_inc_x2", VReg_64>;
    175 defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <0x5d, "flat_atomic_dec_x2", VReg_64>;
    176 defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC <
    177   0x5e, "flat_atomic_fcmpswap_x2", VReg_64, VReg_128
    178 >;
    179 defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <0x5f, "flat_atomic_fmin_x2", VReg_64>;
    180 defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <0x60, "flat_atomic_fmax_x2", VReg_64>;
    181 
    182 } // End SubtargetPredicate = isCIVI
    183 
    184 //===----------------------------------------------------------------------===//
    185 // Flat Patterns
    186 //===----------------------------------------------------------------------===//
    187 
    188 let Predicates = [HasFlatAddressSpace] in {
    189 
    190 class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt,
    191                              PatFrag flat_ld> :
    192   Pat <(vt (flat_ld i64:$ptr)),
    193        (Instr_ADDR64 $ptr, 0, 0, 0)
    194 >;
    195 
    196 def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>;
    197 def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>;
    198 def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>;
    199 def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>;
    200 def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>;
    201 def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>;
    202 def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>;
    203 def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>;
    204 def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>;
    205 
    206 class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> :
    207   Pat <(st vt:$value, i64:$ptr),
    208         (Instr $value, $ptr, 0, 0, 0)
    209   >;
    210 
    211 def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>;
    212 def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>;
    213 def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>;
    214 def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>;
    215 def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>;
    216 def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
    217 
    218 } // End HasFlatAddressSpace predicate
    219 
    220 let Predicates = [isCI] in {
    221 
    222 // Convert (x - floor(x)) to fract(x)
    223 def : Pat <
    224   (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
    225              (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
    226   (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
    227 >;
    228 
    229 // Convert (x + (-floor(x))) to fract(x)
    230 def : Pat <
    231   (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
    232              (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
    233   (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
    234 >;
    235 
    236 } // End Predicates = [isCI]
    237