1 //===-- CIInstructions.td - CI Instruction Defintions ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // Instruction definitions for CI and newer. 10 //===----------------------------------------------------------------------===// 11 // Remaining instructions: 12 // S_CBRANCH_CDBGUSER 13 // S_CBRANCH_CDBGSYS 14 // S_CBRANCH_CDBGSYS_OR_USER 15 // S_CBRANCH_CDBGSYS_AND_USER 16 // DS_NOP 17 // DS_GWS_SEMA_RELEASE_ALL 18 // DS_WRAP_RTN_B32 19 // DS_CNDXCHG32_RTN_B64 20 // DS_WRITE_B96 21 // DS_WRITE_B128 22 // DS_CONDXCHG32_RTN_B128 23 // DS_READ_B96 24 // DS_READ_B128 25 // BUFFER_LOAD_DWORDX3 26 // BUFFER_STORE_DWORDX3 27 28 //===----------------------------------------------------------------------===// 29 // VOP1 Instructions 30 //===----------------------------------------------------------------------===// 31 32 let SubtargetPredicate = isCIVI in { 33 34 let SchedRW = [WriteDoubleAdd] in { 35 defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64", 36 VOP_F64_F64, ftrunc 37 >; 38 defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64", 39 VOP_F64_F64, fceil 40 >; 41 defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64", 42 VOP_F64_F64, ffloor 43 >; 44 defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64", 45 VOP_F64_F64, frint 46 >; 47 } // End SchedRW = [WriteDoubleAdd] 48 49 let SchedRW = [WriteQuarterRate32] in { 50 defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32", 51 VOP_F32_F32 52 >; 53 defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32", 54 VOP_F32_F32 55 >; 56 } // End SchedRW = [WriteQuarterRate32] 57 58 //===----------------------------------------------------------------------===// 59 // VOP3 Instructions 60 //===----------------------------------------------------------------------===// 61 62 defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8", 63 VOP_I32_I32_I32 64 >; 65 defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8", 66 VOP_I32_I32_I32 67 >; 68 defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8", 69 VOP_I32_I32_I32 70 >; 71 72 let isCommutable = 1 in { 73 defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32", 74 VOP_I64_I32_I32_I64 75 >; 76 77 // XXX - Does this set VCC? 78 defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32", 79 VOP_I64_I32_I32_I64 80 >; 81 } // End isCommutable = 1 82 83 84 //===----------------------------------------------------------------------===// 85 // DS Instructions 86 //===----------------------------------------------------------------------===// 87 defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">; 88 89 // DS_CONDXCHG32_RTN_B64 90 // DS_CONDXCHG32_RTN_B128 91 92 //===----------------------------------------------------------------------===// 93 // SMRD Instructions 94 //===----------------------------------------------------------------------===// 95 96 defm S_DCACHE_INV_VOL : SMRD_Inval <smrd<0x1d, 0x22>, 97 "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>; 98 99 //===----------------------------------------------------------------------===// 100 // MUBUF Instructions 101 //===----------------------------------------------------------------------===// 102 103 let DisableSIDecoder = 1 in { 104 defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate <mubuf<0x70, 0x3f>, 105 "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol 106 >; 107 } 108 109 //===----------------------------------------------------------------------===// 110 // Flat Instructions 111 //===----------------------------------------------------------------------===// 112 113 defm FLAT_LOAD_UBYTE : FLAT_Load_Helper < 114 flat<0x8, 0x10>, "flat_load_ubyte", VGPR_32 115 >; 116 defm FLAT_LOAD_SBYTE : FLAT_Load_Helper < 117 flat<0x9, 0x11>, "flat_load_sbyte", VGPR_32 118 >; 119 defm FLAT_LOAD_USHORT : FLAT_Load_Helper < 120 flat<0xa, 0x12>, "flat_load_ushort", VGPR_32 121 >; 122 defm FLAT_LOAD_SSHORT : FLAT_Load_Helper < 123 flat<0xb, 0x13>, "flat_load_sshort", VGPR_32> 124 ; 125 defm FLAT_LOAD_DWORD : FLAT_Load_Helper < 126 flat<0xc, 0x14>, "flat_load_dword", VGPR_32 127 >; 128 defm FLAT_LOAD_DWORDX2 : FLAT_Load_Helper < 129 flat<0xd, 0x15>, "flat_load_dwordx2", VReg_64 130 >; 131 defm FLAT_LOAD_DWORDX4 : FLAT_Load_Helper < 132 flat<0xe, 0x17>, "flat_load_dwordx4", VReg_128 133 >; 134 defm FLAT_LOAD_DWORDX3 : FLAT_Load_Helper < 135 flat<0xf, 0x16>, "flat_load_dwordx3", VReg_96 136 >; 137 defm FLAT_STORE_BYTE : FLAT_Store_Helper < 138 flat<0x18>, "flat_store_byte", VGPR_32 139 >; 140 defm FLAT_STORE_SHORT : FLAT_Store_Helper < 141 flat <0x1a>, "flat_store_short", VGPR_32 142 >; 143 defm FLAT_STORE_DWORD : FLAT_Store_Helper < 144 flat<0x1c>, "flat_store_dword", VGPR_32 145 >; 146 defm FLAT_STORE_DWORDX2 : FLAT_Store_Helper < 147 flat<0x1d>, "flat_store_dwordx2", VReg_64 148 >; 149 defm FLAT_STORE_DWORDX4 : FLAT_Store_Helper < 150 flat<0x1e, 0x1f>, "flat_store_dwordx4", VReg_128 151 >; 152 defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper < 153 flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96 154 >; 155 defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC < 156 flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32, i32, atomic_swap_flat 157 >; 158 defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC < 159 flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, i32, 160 atomic_cmp_swap_flat, v2i32, VReg_64 161 >; 162 defm FLAT_ATOMIC_ADD : FLAT_ATOMIC < 163 flat<0x32, 0x42>, "flat_atomic_add", VGPR_32, i32, atomic_add_flat 164 >; 165 defm FLAT_ATOMIC_SUB : FLAT_ATOMIC < 166 flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32, i32, atomic_sub_flat 167 >; 168 defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC < 169 flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32, i32, atomic_min_flat 170 >; 171 defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC < 172 flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32, i32, atomic_umin_flat 173 >; 174 defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC < 175 flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32, i32, atomic_max_flat 176 >; 177 defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC < 178 flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32, i32, atomic_umax_flat 179 >; 180 defm FLAT_ATOMIC_AND : FLAT_ATOMIC < 181 flat<0x39, 0x48>, "flat_atomic_and", VGPR_32, i32, atomic_and_flat 182 >; 183 defm FLAT_ATOMIC_OR : FLAT_ATOMIC < 184 flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32, i32, atomic_or_flat 185 >; 186 defm FLAT_ATOMIC_XOR : FLAT_ATOMIC < 187 flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32, i32, atomic_xor_flat 188 >; 189 defm FLAT_ATOMIC_INC : FLAT_ATOMIC < 190 flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32, i32, atomic_inc_flat 191 >; 192 defm FLAT_ATOMIC_DEC : FLAT_ATOMIC < 193 flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32, i32, atomic_dec_flat 194 >; 195 defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC < 196 flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64, i64, atomic_swap_flat 197 >; 198 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC < 199 flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, i64, 200 atomic_cmp_swap_flat, v2i64, VReg_128 201 >; 202 defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC < 203 flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64, i64, atomic_add_flat 204 >; 205 defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC < 206 flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64, i64, atomic_sub_flat 207 >; 208 defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC < 209 flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64, i64, atomic_min_flat 210 >; 211 defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC < 212 flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64, i64, atomic_umin_flat 213 >; 214 defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC < 215 flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64, i64, atomic_max_flat 216 >; 217 defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC < 218 flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64, i64, atomic_umax_flat 219 >; 220 defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC < 221 flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64, i64, atomic_and_flat 222 >; 223 defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC < 224 flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64, i64, atomic_or_flat 225 >; 226 defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC < 227 flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64, i64, atomic_xor_flat 228 >; 229 defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC < 230 flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64, i64, atomic_inc_flat 231 >; 232 defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC < 233 flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64, i64, atomic_dec_flat 234 >; 235 236 } // End SubtargetPredicate = isCIVI 237 238 // CI Only flat instructions 239 240 let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 in { 241 242 defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC < 243 flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, f32, 244 null_frag, v2f32, VReg_64 245 >; 246 defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC < 247 flat<0x3f>, "flat_atomic_fmin", VGPR_32, f32 248 >; 249 defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC < 250 flat<0x40>, "flat_atomic_fmax", VGPR_32, f32 251 >; 252 defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC < 253 flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, f64, 254 null_frag, v2f64, VReg_128 255 >; 256 defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC < 257 flat<0x5f>, "flat_atomic_fmin_x2", VReg_64, f64 258 >; 259 defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC < 260 flat<0x60>, "flat_atomic_fmax_x2", VReg_64, f64 261 >; 262 263 } // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 264 265 //===----------------------------------------------------------------------===// 266 // Flat Patterns 267 //===----------------------------------------------------------------------===// 268 269 let Predicates = [isCIVI] in { 270 271 // Patterns for global loads with no offset. 272 class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < 273 (vt (node i64:$addr)), 274 (inst $addr, 0, 0, 0) 275 >; 276 277 class FlatLoadAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < 278 (vt (node i64:$addr)), 279 (inst $addr, 1, 0, 0) 280 >; 281 282 def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>; 283 def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>; 284 def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>; 285 def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>; 286 def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>; 287 def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>; 288 def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>; 289 290 def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>; 291 def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>; 292 293 294 class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < 295 (node vt:$data, i64:$addr), 296 (inst $addr, $data, 0, 0, 0) 297 >; 298 299 class FlatStoreAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < 300 // atomic store follows atomic binop convention so the address comes 301 // first. 302 (node i64:$addr, vt:$data), 303 (inst $addr, $data, 1, 0, 0) 304 >; 305 306 def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>; 307 def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>; 308 def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>; 309 def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>; 310 def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>; 311 312 def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>; 313 def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>; 314 315 class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt, 316 ValueType data_vt = vt> : Pat < 317 (vt (node i64:$addr, data_vt:$data)), 318 (inst $addr, $data, 0, 0) 319 >; 320 321 def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>; 322 def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>; 323 def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>; 324 def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>; 325 def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>; 326 def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>; 327 def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>; 328 def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>; 329 def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>; 330 def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>; 331 def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>; 332 def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, atomic_cmp_swap_global, i32, v2i32>; 333 def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>; 334 335 def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>; 336 def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>; 337 def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>; 338 def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>; 339 def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>; 340 def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>; 341 def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>; 342 def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>; 343 def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>; 344 def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>; 345 def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>; 346 def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, atomic_cmp_swap_global, i64, v2i64>; 347 def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>; 348 349 } // End Predicates = [isCIVI] 350