Home | History | Annotate | Download | only in AMDGPU
      1 //===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains instruction defs that are common to all hw codegen
     11 // targets.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 class AMDGPUInst <dag outs, dag ins, string asm = "",
     16   list<dag> pattern = []> : Instruction {
     17   field bit isRegisterLoad = 0;
     18   field bit isRegisterStore = 0;
     19 
     20   let Namespace = "AMDGPU";
     21   let OutOperandList = outs;
     22   let InOperandList = ins;
     23   let AsmString = asm;
     24   let Pattern = pattern;
     25   let Itinerary = NullALU;
     26 
     27   // SoftFail is a field the disassembler can use to provide a way for
     28   // instructions to not match without killing the whole decode process. It is
     29   // mainly used for ARM, but Tablegen expects this field to exist or it fails
     30   // to build the decode table.
     31   field bits<64> SoftFail = 0;
     32 
     33   let DecoderNamespace = Namespace;
     34 
     35   let TSFlags{63} = isRegisterLoad;
     36   let TSFlags{62} = isRegisterStore;
     37 }
     38 
     39 class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
     40   list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
     41 
     42   field bits<32> Inst = 0xffffffff;
     43 }
     44 
     45 def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
     46 def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
     47 def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
     48 
     49 def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
     50 def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
     51 
     52 // 32-bit VALU immediate operand that uses the constant bus.
     53 def u32kimm : Operand<i32> {
     54   let OperandNamespace = "AMDGPU";
     55   let OperandType = "OPERAND_KIMM32";
     56   let PrintMethod = "printU32ImmOperand";
     57 }
     58 
     59 let OperandType = "OPERAND_IMMEDIATE" in {
     60 
     61 def u32imm : Operand<i32> {
     62   let PrintMethod = "printU32ImmOperand";
     63 }
     64 
     65 def u16imm : Operand<i16> {
     66   let PrintMethod = "printU16ImmOperand";
     67 }
     68 
     69 def u8imm : Operand<i8> {
     70   let PrintMethod = "printU8ImmOperand";
     71 }
     72 
     73 } // End OperandType = "OPERAND_IMMEDIATE"
     74 
     75 //===--------------------------------------------------------------------===//
     76 // Custom Operands
     77 //===--------------------------------------------------------------------===//
     78 def brtarget   : Operand<OtherVT>;
     79 
     80 //===----------------------------------------------------------------------===//
     81 // PatLeafs for floating-point comparisons
     82 //===----------------------------------------------------------------------===//
     83 
     84 def COND_OEQ : PatLeaf <
     85   (cond),
     86   [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
     87 >;
     88 
     89 def COND_ONE : PatLeaf <
     90   (cond),
     91   [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}]
     92 >;
     93 
     94 def COND_OGT : PatLeaf <
     95   (cond),
     96   [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
     97 >;
     98 
     99 def COND_OGE : PatLeaf <
    100   (cond),
    101   [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
    102 >;
    103 
    104 def COND_OLT : PatLeaf <
    105   (cond),
    106   [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
    107 >;
    108 
    109 def COND_OLE : PatLeaf <
    110   (cond),
    111   [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
    112 >;
    113 
    114 
    115 def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
    116 def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
    117 
    118 //===----------------------------------------------------------------------===//
    119 // PatLeafs for unsigned / unordered comparisons
    120 //===----------------------------------------------------------------------===//
    121 
    122 def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>;
    123 def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>;
    124 def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
    125 def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
    126 def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
    127 def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
    128 
    129 // XXX - For some reason R600 version is preferring to use unordered
    130 // for setne?
    131 def COND_UNE_NE : PatLeaf <
    132   (cond),
    133   [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
    134 >;
    135 
    136 //===----------------------------------------------------------------------===//
    137 // PatLeafs for signed comparisons
    138 //===----------------------------------------------------------------------===//
    139 
    140 def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
    141 def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
    142 def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
    143 def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
    144 
    145 //===----------------------------------------------------------------------===//
    146 // PatLeafs for integer equality
    147 //===----------------------------------------------------------------------===//
    148 
    149 def COND_EQ : PatLeaf <
    150   (cond),
    151   [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
    152 >;
    153 
    154 def COND_NE : PatLeaf <
    155   (cond),
    156   [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
    157 >;
    158 
    159 def COND_NULL : PatLeaf <
    160   (cond),
    161   [{(void)N; return false;}]
    162 >;
    163 
    164 
    165 //===----------------------------------------------------------------------===//
    166 // Misc. PatFrags
    167 //===----------------------------------------------------------------------===//
    168 
    169 class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
    170   (ops node:$src0, node:$src1),
    171   (op $src0, $src1),
    172   [{ return N->hasOneUse(); }]
    173 >;
    174 
    175 //===----------------------------------------------------------------------===//
    176 // Load/Store Pattern Fragments
    177 //===----------------------------------------------------------------------===//
    178 
    179 class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
    180   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
    181 }]>;
    182 
    183 class PrivateLoad <SDPatternOperator op> : PrivateMemOp <
    184   (ops node:$ptr), (op node:$ptr)
    185 >;
    186 
    187 class PrivateStore <SDPatternOperator op> : PrivateMemOp <
    188   (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
    189 >;
    190 
    191 def load_private : PrivateLoad <load>;
    192 
    193 def truncstorei8_private : PrivateStore <truncstorei8>;
    194 def truncstorei16_private : PrivateStore <truncstorei16>;
    195 def store_private : PrivateStore <store>;
    196 
    197 class GlobalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
    198   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
    199 }]>;
    200 
    201 // Global address space loads
    202 class GlobalLoad <SDPatternOperator op> : GlobalMemOp <
    203   (ops node:$ptr), (op node:$ptr)
    204 >;
    205 
    206 def global_load : GlobalLoad <load>;
    207 
    208 // Global address space stores
    209 class GlobalStore <SDPatternOperator op> : GlobalMemOp <
    210   (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
    211 >;
    212 
    213 def global_store : GlobalStore <store>;
    214 def global_store_atomic : GlobalStore<atomic_store>;
    215 
    216 
    217 class ConstantMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
    218   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
    219 }]>;
    220 
    221 // Constant address space loads
    222 class ConstantLoad <SDPatternOperator op> : ConstantMemOp <
    223   (ops node:$ptr), (op node:$ptr)
    224 >;
    225 
    226 def constant_load : ConstantLoad<load>;
    227 
    228 class LocalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
    229   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
    230 }]>;
    231 
    232 // Local address space loads
    233 class LocalLoad <SDPatternOperator op> : LocalMemOp <
    234   (ops node:$ptr), (op node:$ptr)
    235 >;
    236 
    237 class LocalStore <SDPatternOperator op> : LocalMemOp <
    238   (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
    239 >;
    240 
    241 class FlatMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
    242   return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS;
    243 }]>;
    244 
    245 class FlatLoad <SDPatternOperator op> : FlatMemOp <
    246   (ops node:$ptr), (op node:$ptr)
    247 >;
    248 
    249 class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
    250                                               (ld_node node:$ptr), [{
    251   LoadSDNode *L = cast<LoadSDNode>(N);
    252   return L->getExtensionType() == ISD::ZEXTLOAD ||
    253          L->getExtensionType() == ISD::EXTLOAD;
    254 }]>;
    255 
    256 def az_extload : AZExtLoadBase <unindexedload>;
    257 
    258 def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
    259   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
    260 }]>;
    261 
    262 def az_extloadi8_global : GlobalLoad <az_extloadi8>;
    263 def sextloadi8_global : GlobalLoad <sextloadi8>;
    264 
    265 def az_extloadi8_constant : ConstantLoad <az_extloadi8>;
    266 def sextloadi8_constant : ConstantLoad <sextloadi8>;
    267 
    268 def az_extloadi8_local : LocalLoad <az_extloadi8>;
    269 def sextloadi8_local : LocalLoad <sextloadi8>;
    270 
    271 def extloadi8_private : PrivateLoad <az_extloadi8>;
    272 def sextloadi8_private : PrivateLoad <sextloadi8>;
    273 
    274 def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
    275   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
    276 }]>;
    277 
    278 def az_extloadi16_global : GlobalLoad <az_extloadi16>;
    279 def sextloadi16_global : GlobalLoad <sextloadi16>;
    280 
    281 def az_extloadi16_constant : ConstantLoad <az_extloadi16>;
    282 def sextloadi16_constant : ConstantLoad <sextloadi16>;
    283 
    284 def az_extloadi16_local : LocalLoad <az_extloadi16>;
    285 def sextloadi16_local : LocalLoad <sextloadi16>;
    286 
    287 def extloadi16_private : PrivateLoad <az_extloadi16>;
    288 def sextloadi16_private : PrivateLoad <sextloadi16>;
    289 
    290 def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
    291   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
    292 }]>;
    293 
    294 def az_extloadi32_global : GlobalLoad <az_extloadi32>;
    295 
    296 def az_extloadi32_flat : FlatLoad <az_extloadi32>;
    297 
    298 def az_extloadi32_constant : ConstantLoad <az_extloadi32>;
    299 
    300 def truncstorei8_global : GlobalStore <truncstorei8>;
    301 def truncstorei16_global : GlobalStore <truncstorei16>;
    302 
    303 def local_store : LocalStore <store>;
    304 def truncstorei8_local : LocalStore <truncstorei8>;
    305 def truncstorei16_local : LocalStore <truncstorei16>;
    306 
    307 def local_load : LocalLoad <load>;
    308 
    309 class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
    310     return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
    311 }]>;
    312 
    313 def local_load_aligned8bytes : Aligned8Bytes <
    314   (ops node:$ptr), (local_load node:$ptr)
    315 >;
    316 
    317 def local_store_aligned8bytes : Aligned8Bytes <
    318   (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr)
    319 >;
    320 
    321 class local_binary_atomic_op<SDNode atomic_op> :
    322   PatFrag<(ops node:$ptr, node:$value),
    323     (atomic_op node:$ptr, node:$value), [{
    324   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
    325 }]>;
    326 
    327 
    328 def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
    329 def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
    330 def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
    331 def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>;
    332 def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>;
    333 def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>;
    334 def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>;
    335 def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>;
    336 def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>;
    337 def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>;
    338 def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
    339 
    340 def mskor_global : PatFrag<(ops node:$val, node:$ptr),
    341                             (AMDGPUstore_mskor node:$val, node:$ptr), [{
    342   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
    343 }]>;
    344 
    345 multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
    346 
    347   def _32_local : PatFrag <
    348     (ops node:$ptr, node:$cmp, node:$swap),
    349     (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
    350       AtomicSDNode *AN = cast<AtomicSDNode>(N);
    351       return AN->getMemoryVT() == MVT::i32 &&
    352              AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
    353   }]>;
    354 
    355   def _64_local : PatFrag<
    356     (ops node:$ptr, node:$cmp, node:$swap),
    357     (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
    358       AtomicSDNode *AN = cast<AtomicSDNode>(N);
    359       return AN->getMemoryVT() == MVT::i64 &&
    360              AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
    361   }]>;
    362 }
    363 
    364 defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>;
    365 
    366 def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
    367                             (AMDGPUstore_mskor node:$val, node:$ptr), [{
    368   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
    369 }]>;
    370 
    371 class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
    372   (ops node:$ptr, node:$value),
    373   (atomic_op node:$ptr, node:$value),
    374   [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]
    375 >;
    376 
    377 class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
    378   (ops node:$ptr, node:$value),
    379   (atomic_op node:$ptr, node:$value),
    380   [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
    381 >;
    382 
    383 def atomic_swap_global : global_binary_atomic_op<atomic_swap>;
    384 def atomic_add_global : global_binary_atomic_op<atomic_load_add>;
    385 def atomic_and_global : global_binary_atomic_op<atomic_load_and>;
    386 def atomic_max_global : global_binary_atomic_op<atomic_load_max>;
    387 def atomic_min_global : global_binary_atomic_op<atomic_load_min>;
    388 def atomic_or_global : global_binary_atomic_op<atomic_load_or>;
    389 def atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
    390 def atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
    391 def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
    392 def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
    393 
    394 def atomic_cmp_swap_global : global_binary_atomic_op<AMDGPUatomic_cmp_swap>;
    395 def atomic_cmp_swap_global_nortn : PatFrag<
    396   (ops node:$ptr, node:$value),
    397   (atomic_cmp_swap_global node:$ptr, node:$value),
    398   [{ return SDValue(N, 0).use_empty(); }]
    399 >;
    400 
    401 def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>;
    402 def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>;
    403 def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>;
    404 def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>;
    405 def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>;
    406 def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>;
    407 def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>;
    408 def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>;
    409 def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>;
    410 def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>;
    411 
    412 def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
    413 
    414 //===----------------------------------------------------------------------===//
    415 // Misc Pattern Fragments
    416 //===----------------------------------------------------------------------===//
    417 
    418 class Constants {
    419 int TWO_PI = 0x40c90fdb;
    420 int PI = 0x40490fdb;
    421 int TWO_PI_INV = 0x3e22f983;
    422 int FP_UINT_MAX_PLUS_1 = 0x4f800000;    // 1 << 32 in floating point encoding
    423 int FP32_NEG_ONE = 0xbf800000;
    424 int FP32_ONE = 0x3f800000;
    425 int FP64_ONE = 0x3ff0000000000000;
    426 }
    427 def CONST : Constants;
    428 
    429 def FP_ZERO : PatLeaf <
    430   (fpimm),
    431   [{return N->getValueAPF().isZero();}]
    432 >;
    433 
    434 def FP_ONE : PatLeaf <
    435   (fpimm),
    436   [{return N->isExactlyValue(1.0);}]
    437 >;
    438 
    439 def FP_HALF : PatLeaf <
    440   (fpimm),
    441   [{return N->isExactlyValue(0.5);}]
    442 >;
    443 
    444 let isCodeGenOnly = 1, isPseudo = 1 in {
    445 
    446 let usesCustomInserter = 1  in {
    447 
    448 class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
    449   (outs rc:$dst),
    450   (ins rc:$src0),
    451   "CLAMP $dst, $src0",
    452   [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
    453 >;
    454 
    455 class FABS <RegisterClass rc> : AMDGPUShaderInst <
    456   (outs rc:$dst),
    457   (ins rc:$src0),
    458   "FABS $dst, $src0",
    459   [(set f32:$dst, (fabs f32:$src0))]
    460 >;
    461 
    462 class FNEG <RegisterClass rc> : AMDGPUShaderInst <
    463   (outs rc:$dst),
    464   (ins rc:$src0),
    465   "FNEG $dst, $src0",
    466   [(set f32:$dst, (fneg f32:$src0))]
    467 >;
    468 
    469 } // usesCustomInserter = 1
    470 
    471 multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
    472                     ComplexPattern addrPat> {
    473 let UseNamedOperandTable = 1 in {
    474 
    475   def RegisterLoad : AMDGPUShaderInst <
    476     (outs dstClass:$dst),
    477     (ins addrClass:$addr, i32imm:$chan),
    478     "RegisterLoad $dst, $addr",
    479     [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))]
    480   > {
    481     let isRegisterLoad = 1;
    482   }
    483 
    484   def RegisterStore : AMDGPUShaderInst <
    485     (outs),
    486     (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
    487     "RegisterStore $val, $addr",
    488     [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))]
    489   > {
    490     let isRegisterStore = 1;
    491   }
    492 }
    493 }
    494 
    495 } // End isCodeGenOnly = 1, isPseudo = 1
    496 
    497 /* Generic helper patterns for intrinsics */
    498 /* -------------------------------------- */
    499 
    500 class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
    501   : Pat <
    502   (fpow f32:$src0, f32:$src1),
    503   (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
    504 >;
    505 
    506 /* Other helper patterns */
    507 /* --------------------- */
    508 
    509 /* Extract element pattern */
    510 class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
    511                        SubRegIndex sub_reg>
    512   : Pat<
    513   (sub_type (extractelt vec_type:$src, sub_idx)),
    514   (EXTRACT_SUBREG $src, sub_reg)
    515 >;
    516 
    517 /* Insert element pattern */
    518 class Insert_Element <ValueType elem_type, ValueType vec_type,
    519                       int sub_idx, SubRegIndex sub_reg>
    520   : Pat <
    521   (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
    522   (INSERT_SUBREG $vec, $elem, sub_reg)
    523 >;
    524 
    525 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
    526 // can handle COPY instructions.
    527 // bitconvert pattern
    528 class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
    529   (dt (bitconvert (st rc:$src0))),
    530   (dt rc:$src0)
    531 >;
    532 
    533 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
    534 // can handle COPY instructions.
    535 class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
    536   (vt (AMDGPUdwordaddr (vt rc:$addr))),
    537   (vt rc:$addr)
    538 >;
    539 
    540 // BFI_INT patterns
    541 
    542 multiclass BFIPatterns <Instruction BFI_INT,
    543                         Instruction LoadImm32,
    544                         RegisterClass RC64> {
    545   // Definition from ISA doc:
    546   // (y & x) | (z & ~x)
    547   def : Pat <
    548     (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
    549     (BFI_INT $x, $y, $z)
    550   >;
    551 
    552   // SHA-256 Ch function
    553   // z ^ (x & (y ^ z))
    554   def : Pat <
    555     (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
    556     (BFI_INT $x, $y, $z)
    557   >;
    558 
    559   def : Pat <
    560     (fcopysign f32:$src0, f32:$src1),
    561     (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1)
    562   >;
    563 
    564   def : Pat <
    565     (f64 (fcopysign f64:$src0, f64:$src1)),
    566     (REG_SEQUENCE RC64,
    567       (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
    568       (BFI_INT (LoadImm32 0x7fffffff),
    569                (i32 (EXTRACT_SUBREG $src0, sub1)),
    570                (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
    571   >;
    572 }
    573 
    574 // SHA-256 Ma patterns
    575 
    576 // ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
    577 class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
    578   (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
    579   (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
    580 >;
    581 
    582 // Bitfield extract patterns
    583 
    584 def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
    585   return isMask_32(N->getZExtValue());
    586 }]>;
    587 
    588 def IMMPopCount : SDNodeXForm<imm, [{
    589   return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
    590                                    MVT::i32);
    591 }]>;
    592 
    593 class BFEPattern <Instruction BFE, Instruction MOV> : Pat <
    594   (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
    595   (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
    596 >;
    597 
    598 // rotr pattern
    599 class ROTRPattern <Instruction BIT_ALIGN> : Pat <
    600   (rotr i32:$src0, i32:$src1),
    601   (BIT_ALIGN $src0, $src0, $src1)
    602 >;
    603 
    604 // This matches 16 permutations of
    605 // max(min(x, y), min(max(x, y), z))
    606 class IntMed3Pat<Instruction med3Inst,
    607                  SDPatternOperator max,
    608                  SDPatternOperator max_oneuse,
    609                  SDPatternOperator min_oneuse> : Pat<
    610   (max (min_oneuse i32:$src0, i32:$src1),
    611        (min_oneuse (max_oneuse i32:$src0, i32:$src1), i32:$src2)),
    612   (med3Inst $src0, $src1, $src2)
    613 >;
    614 
    615 let Properties = [SDNPCommutative, SDNPAssociative] in {
    616 def smax_oneuse : HasOneUseBinOp<smax>;
    617 def smin_oneuse : HasOneUseBinOp<smin>;
    618 def umax_oneuse : HasOneUseBinOp<umax>;
    619 def umin_oneuse : HasOneUseBinOp<umin>;
    620 } // Properties = [SDNPCommutative, SDNPAssociative]
    621 
    622 
    623 // 24-bit arithmetic patterns
    624 def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>;
    625 
    626 // Special conversion patterns
    627 
    628 def cvt_rpi_i32_f32 : PatFrag <
    629   (ops node:$src),
    630   (fp_to_sint (ffloor (fadd $src, FP_HALF))),
    631   [{ (void) N; return TM.Options.NoNaNsFPMath; }]
    632 >;
    633 
    634 def cvt_flr_i32_f32 : PatFrag <
    635   (ops node:$src),
    636   (fp_to_sint (ffloor $src)),
    637   [{ (void)N; return TM.Options.NoNaNsFPMath; }]
    638 >;
    639 
    640 class IMad24Pat<Instruction Inst> : Pat <
    641   (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
    642   (Inst $src0, $src1, $src2)
    643 >;
    644 
    645 class UMad24Pat<Instruction Inst> : Pat <
    646   (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
    647   (Inst $src0, $src1, $src2)
    648 >;
    649 
    650 class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
    651   (fdiv FP_ONE, vt:$src),
    652   (RcpInst $src)
    653 >;
    654 
    655 class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
    656   (AMDGPUrcp (fsqrt vt:$src)),
    657   (RsqInst $src)
    658 >;
    659 
    660 include "R600Instructions.td"
    661 include "R700Instructions.td"
    662 include "EvergreenInstructions.td"
    663 include "CaymanInstructions.td"
    664 
    665 include "SIInstrInfo.td"
    666 
    667