1 //===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains instruction defs that are common to all hw codegen 11 // targets. 12 // 13 //===----------------------------------------------------------------------===// 14 15 class AMDGPUInst <dag outs, dag ins, string asm = "", 16 list<dag> pattern = []> : Instruction { 17 field bit isRegisterLoad = 0; 18 field bit isRegisterStore = 0; 19 20 let Namespace = "AMDGPU"; 21 let OutOperandList = outs; 22 let InOperandList = ins; 23 let AsmString = asm; 24 let Pattern = pattern; 25 let Itinerary = NullALU; 26 27 // SoftFail is a field the disassembler can use to provide a way for 28 // instructions to not match without killing the whole decode process. It is 29 // mainly used for ARM, but Tablegen expects this field to exist or it fails 30 // to build the decode table. 31 field bits<64> SoftFail = 0; 32 33 let DecoderNamespace = Namespace; 34 35 let TSFlags{63} = isRegisterLoad; 36 let TSFlags{62} = isRegisterStore; 37 } 38 39 class AMDGPUShaderInst <dag outs, dag ins, string asm = "", 40 list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> { 41 42 field bits<32> Inst = 0xffffffff; 43 } 44 45 def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">; 46 def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">; 47 def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; 48 49 def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; 50 def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; 51 52 // 32-bit VALU immediate operand that uses the constant bus. 53 def u32kimm : Operand<i32> { 54 let OperandNamespace = "AMDGPU"; 55 let OperandType = "OPERAND_KIMM32"; 56 let PrintMethod = "printU32ImmOperand"; 57 } 58 59 let OperandType = "OPERAND_IMMEDIATE" in { 60 61 def u32imm : Operand<i32> { 62 let PrintMethod = "printU32ImmOperand"; 63 } 64 65 def u16imm : Operand<i16> { 66 let PrintMethod = "printU16ImmOperand"; 67 } 68 69 def u8imm : Operand<i8> { 70 let PrintMethod = "printU8ImmOperand"; 71 } 72 73 } // End OperandType = "OPERAND_IMMEDIATE" 74 75 //===--------------------------------------------------------------------===// 76 // Custom Operands 77 //===--------------------------------------------------------------------===// 78 def brtarget : Operand<OtherVT>; 79 80 //===----------------------------------------------------------------------===// 81 // PatLeafs for floating-point comparisons 82 //===----------------------------------------------------------------------===// 83 84 def COND_OEQ : PatLeaf < 85 (cond), 86 [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}] 87 >; 88 89 def COND_ONE : PatLeaf < 90 (cond), 91 [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}] 92 >; 93 94 def COND_OGT : PatLeaf < 95 (cond), 96 [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}] 97 >; 98 99 def COND_OGE : PatLeaf < 100 (cond), 101 [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}] 102 >; 103 104 def COND_OLT : PatLeaf < 105 (cond), 106 [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}] 107 >; 108 109 def COND_OLE : PatLeaf < 110 (cond), 111 [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}] 112 >; 113 114 115 def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; 116 def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; 117 118 //===----------------------------------------------------------------------===// 119 // PatLeafs for unsigned / unordered comparisons 120 //===----------------------------------------------------------------------===// 121 122 def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>; 123 def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>; 124 def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>; 125 def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>; 126 def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>; 127 def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>; 128 129 // XXX - For some reason R600 version is preferring to use unordered 130 // for setne? 131 def COND_UNE_NE : PatLeaf < 132 (cond), 133 [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] 134 >; 135 136 //===----------------------------------------------------------------------===// 137 // PatLeafs for signed comparisons 138 //===----------------------------------------------------------------------===// 139 140 def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>; 141 def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>; 142 def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>; 143 def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>; 144 145 //===----------------------------------------------------------------------===// 146 // PatLeafs for integer equality 147 //===----------------------------------------------------------------------===// 148 149 def COND_EQ : PatLeaf < 150 (cond), 151 [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}] 152 >; 153 154 def COND_NE : PatLeaf < 155 (cond), 156 [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}] 157 >; 158 159 def COND_NULL : PatLeaf < 160 (cond), 161 [{(void)N; return false;}] 162 >; 163 164 165 //===----------------------------------------------------------------------===// 166 // Misc. PatFrags 167 //===----------------------------------------------------------------------===// 168 169 class HasOneUseBinOp<SDPatternOperator op> : PatFrag< 170 (ops node:$src0, node:$src1), 171 (op $src0, $src1), 172 [{ return N->hasOneUse(); }] 173 >; 174 175 //===----------------------------------------------------------------------===// 176 // Load/Store Pattern Fragments 177 //===----------------------------------------------------------------------===// 178 179 class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 180 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 181 }]>; 182 183 class PrivateLoad <SDPatternOperator op> : PrivateMemOp < 184 (ops node:$ptr), (op node:$ptr) 185 >; 186 187 class PrivateStore <SDPatternOperator op> : PrivateMemOp < 188 (ops node:$value, node:$ptr), (op node:$value, node:$ptr) 189 >; 190 191 def load_private : PrivateLoad <load>; 192 193 def truncstorei8_private : PrivateStore <truncstorei8>; 194 def truncstorei16_private : PrivateStore <truncstorei16>; 195 def store_private : PrivateStore <store>; 196 197 class GlobalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 198 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 199 }]>; 200 201 // Global address space loads 202 class GlobalLoad <SDPatternOperator op> : GlobalMemOp < 203 (ops node:$ptr), (op node:$ptr) 204 >; 205 206 def global_load : GlobalLoad <load>; 207 208 // Global address space stores 209 class GlobalStore <SDPatternOperator op> : GlobalMemOp < 210 (ops node:$value, node:$ptr), (op node:$value, node:$ptr) 211 >; 212 213 def global_store : GlobalStore <store>; 214 def global_store_atomic : GlobalStore<atomic_store>; 215 216 217 class ConstantMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 218 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 219 }]>; 220 221 // Constant address space loads 222 class ConstantLoad <SDPatternOperator op> : ConstantMemOp < 223 (ops node:$ptr), (op node:$ptr) 224 >; 225 226 def constant_load : ConstantLoad<load>; 227 228 class LocalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 229 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 230 }]>; 231 232 // Local address space loads 233 class LocalLoad <SDPatternOperator op> : LocalMemOp < 234 (ops node:$ptr), (op node:$ptr) 235 >; 236 237 class LocalStore <SDPatternOperator op> : LocalMemOp < 238 (ops node:$value, node:$ptr), (op node:$value, node:$ptr) 239 >; 240 241 class FlatMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 242 return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS; 243 }]>; 244 245 class FlatLoad <SDPatternOperator op> : FlatMemOp < 246 (ops node:$ptr), (op node:$ptr) 247 >; 248 249 class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr), 250 (ld_node node:$ptr), [{ 251 LoadSDNode *L = cast<LoadSDNode>(N); 252 return L->getExtensionType() == ISD::ZEXTLOAD || 253 L->getExtensionType() == ISD::EXTLOAD; 254 }]>; 255 256 def az_extload : AZExtLoadBase <unindexedload>; 257 258 def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 259 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; 260 }]>; 261 262 def az_extloadi8_global : GlobalLoad <az_extloadi8>; 263 def sextloadi8_global : GlobalLoad <sextloadi8>; 264 265 def az_extloadi8_constant : ConstantLoad <az_extloadi8>; 266 def sextloadi8_constant : ConstantLoad <sextloadi8>; 267 268 def az_extloadi8_local : LocalLoad <az_extloadi8>; 269 def sextloadi8_local : LocalLoad <sextloadi8>; 270 271 def extloadi8_private : PrivateLoad <az_extloadi8>; 272 def sextloadi8_private : PrivateLoad <sextloadi8>; 273 274 def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 275 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; 276 }]>; 277 278 def az_extloadi16_global : GlobalLoad <az_extloadi16>; 279 def sextloadi16_global : GlobalLoad <sextloadi16>; 280 281 def az_extloadi16_constant : ConstantLoad <az_extloadi16>; 282 def sextloadi16_constant : ConstantLoad <sextloadi16>; 283 284 def az_extloadi16_local : LocalLoad <az_extloadi16>; 285 def sextloadi16_local : LocalLoad <sextloadi16>; 286 287 def extloadi16_private : PrivateLoad <az_extloadi16>; 288 def sextloadi16_private : PrivateLoad <sextloadi16>; 289 290 def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 291 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; 292 }]>; 293 294 def az_extloadi32_global : GlobalLoad <az_extloadi32>; 295 296 def az_extloadi32_flat : FlatLoad <az_extloadi32>; 297 298 def az_extloadi32_constant : ConstantLoad <az_extloadi32>; 299 300 def truncstorei8_global : GlobalStore <truncstorei8>; 301 def truncstorei16_global : GlobalStore <truncstorei16>; 302 303 def local_store : LocalStore <store>; 304 def truncstorei8_local : LocalStore <truncstorei8>; 305 def truncstorei16_local : LocalStore <truncstorei16>; 306 307 def local_load : LocalLoad <load>; 308 309 class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{ 310 return cast<MemSDNode>(N)->getAlignment() % 8 == 0; 311 }]>; 312 313 def local_load_aligned8bytes : Aligned8Bytes < 314 (ops node:$ptr), (local_load node:$ptr) 315 >; 316 317 def local_store_aligned8bytes : Aligned8Bytes < 318 (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr) 319 >; 320 321 class local_binary_atomic_op<SDNode atomic_op> : 322 PatFrag<(ops node:$ptr, node:$value), 323 (atomic_op node:$ptr, node:$value), [{ 324 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 325 }]>; 326 327 328 def atomic_swap_local : local_binary_atomic_op<atomic_swap>; 329 def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>; 330 def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>; 331 def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>; 332 def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>; 333 def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>; 334 def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>; 335 def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>; 336 def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>; 337 def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>; 338 def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>; 339 340 def mskor_global : PatFrag<(ops node:$val, node:$ptr), 341 (AMDGPUstore_mskor node:$val, node:$ptr), [{ 342 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 343 }]>; 344 345 multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> { 346 347 def _32_local : PatFrag < 348 (ops node:$ptr, node:$cmp, node:$swap), 349 (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ 350 AtomicSDNode *AN = cast<AtomicSDNode>(N); 351 return AN->getMemoryVT() == MVT::i32 && 352 AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 353 }]>; 354 355 def _64_local : PatFrag< 356 (ops node:$ptr, node:$cmp, node:$swap), 357 (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ 358 AtomicSDNode *AN = cast<AtomicSDNode>(N); 359 return AN->getMemoryVT() == MVT::i64 && 360 AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 361 }]>; 362 } 363 364 defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>; 365 366 def mskor_flat : PatFrag<(ops node:$val, node:$ptr), 367 (AMDGPUstore_mskor node:$val, node:$ptr), [{ 368 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; 369 }]>; 370 371 class global_binary_atomic_op<SDNode atomic_op> : PatFrag< 372 (ops node:$ptr, node:$value), 373 (atomic_op node:$ptr, node:$value), 374 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}] 375 >; 376 377 class flat_binary_atomic_op<SDNode atomic_op> : PatFrag< 378 (ops node:$ptr, node:$value), 379 (atomic_op node:$ptr, node:$value), 380 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] 381 >; 382 383 def atomic_swap_global : global_binary_atomic_op<atomic_swap>; 384 def atomic_add_global : global_binary_atomic_op<atomic_load_add>; 385 def atomic_and_global : global_binary_atomic_op<atomic_load_and>; 386 def atomic_max_global : global_binary_atomic_op<atomic_load_max>; 387 def atomic_min_global : global_binary_atomic_op<atomic_load_min>; 388 def atomic_or_global : global_binary_atomic_op<atomic_load_or>; 389 def atomic_sub_global : global_binary_atomic_op<atomic_load_sub>; 390 def atomic_umax_global : global_binary_atomic_op<atomic_load_umax>; 391 def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>; 392 def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>; 393 394 def atomic_cmp_swap_global : global_binary_atomic_op<AMDGPUatomic_cmp_swap>; 395 def atomic_cmp_swap_global_nortn : PatFrag< 396 (ops node:$ptr, node:$value), 397 (atomic_cmp_swap_global node:$ptr, node:$value), 398 [{ return SDValue(N, 0).use_empty(); }] 399 >; 400 401 def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>; 402 def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>; 403 def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>; 404 def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>; 405 def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>; 406 def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>; 407 def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>; 408 def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>; 409 def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>; 410 def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>; 411 412 def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>; 413 414 //===----------------------------------------------------------------------===// 415 // Misc Pattern Fragments 416 //===----------------------------------------------------------------------===// 417 418 class Constants { 419 int TWO_PI = 0x40c90fdb; 420 int PI = 0x40490fdb; 421 int TWO_PI_INV = 0x3e22f983; 422 int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding 423 int FP32_NEG_ONE = 0xbf800000; 424 int FP32_ONE = 0x3f800000; 425 int FP64_ONE = 0x3ff0000000000000; 426 } 427 def CONST : Constants; 428 429 def FP_ZERO : PatLeaf < 430 (fpimm), 431 [{return N->getValueAPF().isZero();}] 432 >; 433 434 def FP_ONE : PatLeaf < 435 (fpimm), 436 [{return N->isExactlyValue(1.0);}] 437 >; 438 439 def FP_HALF : PatLeaf < 440 (fpimm), 441 [{return N->isExactlyValue(0.5);}] 442 >; 443 444 let isCodeGenOnly = 1, isPseudo = 1 in { 445 446 let usesCustomInserter = 1 in { 447 448 class CLAMP <RegisterClass rc> : AMDGPUShaderInst < 449 (outs rc:$dst), 450 (ins rc:$src0), 451 "CLAMP $dst, $src0", 452 [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] 453 >; 454 455 class FABS <RegisterClass rc> : AMDGPUShaderInst < 456 (outs rc:$dst), 457 (ins rc:$src0), 458 "FABS $dst, $src0", 459 [(set f32:$dst, (fabs f32:$src0))] 460 >; 461 462 class FNEG <RegisterClass rc> : AMDGPUShaderInst < 463 (outs rc:$dst), 464 (ins rc:$src0), 465 "FNEG $dst, $src0", 466 [(set f32:$dst, (fneg f32:$src0))] 467 >; 468 469 } // usesCustomInserter = 1 470 471 multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, 472 ComplexPattern addrPat> { 473 let UseNamedOperandTable = 1 in { 474 475 def RegisterLoad : AMDGPUShaderInst < 476 (outs dstClass:$dst), 477 (ins addrClass:$addr, i32imm:$chan), 478 "RegisterLoad $dst, $addr", 479 [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))] 480 > { 481 let isRegisterLoad = 1; 482 } 483 484 def RegisterStore : AMDGPUShaderInst < 485 (outs), 486 (ins dstClass:$val, addrClass:$addr, i32imm:$chan), 487 "RegisterStore $val, $addr", 488 [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))] 489 > { 490 let isRegisterStore = 1; 491 } 492 } 493 } 494 495 } // End isCodeGenOnly = 1, isPseudo = 1 496 497 /* Generic helper patterns for intrinsics */ 498 /* -------------------------------------- */ 499 500 class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul> 501 : Pat < 502 (fpow f32:$src0, f32:$src1), 503 (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) 504 >; 505 506 /* Other helper patterns */ 507 /* --------------------- */ 508 509 /* Extract element pattern */ 510 class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 511 SubRegIndex sub_reg> 512 : Pat< 513 (sub_type (extractelt vec_type:$src, sub_idx)), 514 (EXTRACT_SUBREG $src, sub_reg) 515 >; 516 517 /* Insert element pattern */ 518 class Insert_Element <ValueType elem_type, ValueType vec_type, 519 int sub_idx, SubRegIndex sub_reg> 520 : Pat < 521 (insertelt vec_type:$vec, elem_type:$elem, sub_idx), 522 (INSERT_SUBREG $vec, $elem, sub_reg) 523 >; 524 525 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 526 // can handle COPY instructions. 527 // bitconvert pattern 528 class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat < 529 (dt (bitconvert (st rc:$src0))), 530 (dt rc:$src0) 531 >; 532 533 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 534 // can handle COPY instructions. 535 class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat < 536 (vt (AMDGPUdwordaddr (vt rc:$addr))), 537 (vt rc:$addr) 538 >; 539 540 // BFI_INT patterns 541 542 multiclass BFIPatterns <Instruction BFI_INT, 543 Instruction LoadImm32, 544 RegisterClass RC64> { 545 // Definition from ISA doc: 546 // (y & x) | (z & ~x) 547 def : Pat < 548 (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))), 549 (BFI_INT $x, $y, $z) 550 >; 551 552 // SHA-256 Ch function 553 // z ^ (x & (y ^ z)) 554 def : Pat < 555 (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))), 556 (BFI_INT $x, $y, $z) 557 >; 558 559 def : Pat < 560 (fcopysign f32:$src0, f32:$src1), 561 (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1) 562 >; 563 564 def : Pat < 565 (f64 (fcopysign f64:$src0, f64:$src1)), 566 (REG_SEQUENCE RC64, 567 (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, 568 (BFI_INT (LoadImm32 0x7fffffff), 569 (i32 (EXTRACT_SUBREG $src0, sub1)), 570 (i32 (EXTRACT_SUBREG $src1, sub1))), sub1) 571 >; 572 } 573 574 // SHA-256 Ma patterns 575 576 // ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y 577 class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat < 578 (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))), 579 (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y) 580 >; 581 582 // Bitfield extract patterns 583 584 def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{ 585 return isMask_32(N->getZExtValue()); 586 }]>; 587 588 def IMMPopCount : SDNodeXForm<imm, [{ 589 return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N), 590 MVT::i32); 591 }]>; 592 593 class BFEPattern <Instruction BFE, Instruction MOV> : Pat < 594 (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), 595 (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) 596 >; 597 598 // rotr pattern 599 class ROTRPattern <Instruction BIT_ALIGN> : Pat < 600 (rotr i32:$src0, i32:$src1), 601 (BIT_ALIGN $src0, $src0, $src1) 602 >; 603 604 // This matches 16 permutations of 605 // max(min(x, y), min(max(x, y), z)) 606 class IntMed3Pat<Instruction med3Inst, 607 SDPatternOperator max, 608 SDPatternOperator max_oneuse, 609 SDPatternOperator min_oneuse> : Pat< 610 (max (min_oneuse i32:$src0, i32:$src1), 611 (min_oneuse (max_oneuse i32:$src0, i32:$src1), i32:$src2)), 612 (med3Inst $src0, $src1, $src2) 613 >; 614 615 let Properties = [SDNPCommutative, SDNPAssociative] in { 616 def smax_oneuse : HasOneUseBinOp<smax>; 617 def smin_oneuse : HasOneUseBinOp<smin>; 618 def umax_oneuse : HasOneUseBinOp<umax>; 619 def umin_oneuse : HasOneUseBinOp<umin>; 620 } // Properties = [SDNPCommutative, SDNPAssociative] 621 622 623 // 24-bit arithmetic patterns 624 def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>; 625 626 // Special conversion patterns 627 628 def cvt_rpi_i32_f32 : PatFrag < 629 (ops node:$src), 630 (fp_to_sint (ffloor (fadd $src, FP_HALF))), 631 [{ (void) N; return TM.Options.NoNaNsFPMath; }] 632 >; 633 634 def cvt_flr_i32_f32 : PatFrag < 635 (ops node:$src), 636 (fp_to_sint (ffloor $src)), 637 [{ (void)N; return TM.Options.NoNaNsFPMath; }] 638 >; 639 640 class IMad24Pat<Instruction Inst> : Pat < 641 (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), 642 (Inst $src0, $src1, $src2) 643 >; 644 645 class UMad24Pat<Instruction Inst> : Pat < 646 (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), 647 (Inst $src0, $src1, $src2) 648 >; 649 650 class RcpPat<Instruction RcpInst, ValueType vt> : Pat < 651 (fdiv FP_ONE, vt:$src), 652 (RcpInst $src) 653 >; 654 655 class RsqPat<Instruction RsqInst, ValueType vt> : Pat < 656 (AMDGPUrcp (fsqrt vt:$src)), 657 (RsqInst $src) 658 >; 659 660 include "R600Instructions.td" 661 include "R700Instructions.td" 662 include "EvergreenInstructions.td" 663 include "CaymanInstructions.td" 664 665 include "SIInstrInfo.td" 666 667