Home | History | Annotate | Download | only in AMDGPU
      1 //===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
     11 def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [], -10>;
     12 
     13 def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [], -10>;
     14 def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [], -10>;
     15 
     16 //===----------------------------------------------------------------------===//
     17 // FLAT classes
     18 //===----------------------------------------------------------------------===//
     19 
     20 class FLAT_Pseudo<string opName, dag outs, dag ins,
     21                   string asmOps, list<dag> pattern=[]> :
     22   InstSI<outs, ins, "", pattern>,
     23   SIMCInstr<opName, SIEncodingFamily.NONE> {
     24 
     25   let isPseudo = 1;
     26   let isCodeGenOnly = 1;
     27 
     28   let FLAT = 1;
     29 
     30   let UseNamedOperandTable = 1;
     31   let hasSideEffects = 0;
     32   let SchedRW = [WriteVMEM];
     33 
     34   string Mnemonic = opName;
     35   string AsmOperands = asmOps;
     36 
     37   bits<1> is_flat_global = 0;
     38   bits<1> is_flat_scratch = 0;
     39 
     40   bits<1> has_vdst = 1;
     41 
     42   // We need to distinguish having saddr and enabling saddr because
     43   // saddr is only valid for scratch and global instructions. Pre-gfx9
     44   // these bits were reserved, so we also don't necessarily want to
     45   // set these bits to the disabled value for the original flat
     46   // segment instructions.
     47   bits<1> has_saddr = 0;
     48   bits<1> enabled_saddr = 0;
     49   bits<7> saddr_value = 0;
     50   bits<1> has_vaddr = 1;
     51 
     52   bits<1> has_data = 1;
     53   bits<1> has_glc  = 1;
     54   bits<1> glcValue = 0;
     55 
     56   let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
     57     !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
     58 
     59   // TODO: M0 if it could possibly access LDS (before gfx9? only)?
     60   let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
     61 
     62   // Internally, FLAT instruction are executed as both an LDS and a
     63   // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
     64   // and are not considered done until both have been decremented.
     65   let VM_CNT = 1;
     66   let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1);
     67 }
     68 
     69 class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
     70   InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
     71   Enc64 {
     72 
     73   let isPseudo = 0;
     74   let isCodeGenOnly = 0;
     75 
     76   // copy relevant pseudo op flags
     77   let SubtargetPredicate = ps.SubtargetPredicate;
     78   let AsmMatchConverter  = ps.AsmMatchConverter;
     79   let TSFlags = ps.TSFlags;
     80   let UseNamedOperandTable = ps.UseNamedOperandTable;
     81 
     82   // encoding fields
     83   bits<8> vaddr;
     84   bits<8> vdata;
     85   bits<7> saddr;
     86   bits<8> vdst;
     87 
     88   bits<1> slc;
     89   bits<1> glc;
     90 
     91   // Only valid on gfx9
     92   bits<1> lds = 0; // XXX - What does this actually do?
     93 
     94   // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
     95   bits<2> seg = !if(ps.is_flat_global, 0b10,
     96                   !if(ps.is_flat_scratch, 0b01, 0));
     97 
     98   // Signed offset. Highest bit ignored for flat and treated as 12-bit
     99   // unsigned for flat acceses.
    100   bits<13> offset;
    101   bits<1> nv = 0; // XXX - What does this actually do?
    102 
    103   // We don't use tfe right now, and it was removed in gfx9.
    104   bits<1> tfe = 0;
    105 
    106   // Only valid on GFX9+
    107   let Inst{12-0} = offset;
    108   let Inst{13} = lds;
    109   let Inst{15-14} = seg;
    110 
    111   let Inst{16}    = !if(ps.has_glc, glc, ps.glcValue);
    112   let Inst{17}    = slc;
    113   let Inst{24-18} = op;
    114   let Inst{31-26} = 0x37; // Encoding.
    115   let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
    116   let Inst{47-40} = !if(ps.has_data, vdata, ?);
    117   let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
    118 
    119   // 54-48 is reserved.
    120   let Inst{55}    = nv; // nv on GFX9+, TFE before.
    121   let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
    122 }
    123 
    124 // TODO: Is exec allowed for saddr? The disabled value 0x7f is the
    125 // same encoding value as exec_hi, so it isn't possible to use that if
    126 // saddr is 32-bit (which isn't handled here yet).
    127 class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
    128   bit HasTiedOutput = 0,
    129   bit HasSignedOffset = 0, bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
    130   opName,
    131   (outs regClass:$vdst),
    132   !con(
    133     !con(
    134       !con(
    135         !con((ins VReg_64:$vaddr),
    136           !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
    137             (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)),
    138             (ins GLC:$glc, SLC:$slc)),
    139             !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
    140   " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
    141   let has_data = 0;
    142   let mayLoad = 1;
    143   let has_saddr = HasSaddr;
    144   let enabled_saddr = EnableSaddr;
    145   let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
    146   let maybeAtomic = 1;
    147 
    148   let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
    149   let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
    150 }
    151 
    152 class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
    153   bit HasSignedOffset = 0, bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
    154   opName,
    155   (outs),
    156   !con(
    157     !con(
    158       !con((ins VReg_64:$vaddr, vdataClass:$vdata),
    159         !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
    160           (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)),
    161           (ins GLC:$glc, SLC:$slc)),
    162   " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
    163   let mayLoad  = 0;
    164   let mayStore = 1;
    165   let has_vdst = 0;
    166   let has_saddr = HasSaddr;
    167   let enabled_saddr = EnableSaddr;
    168   let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
    169   let maybeAtomic = 1;
    170 }
    171 
    172 multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
    173   let is_flat_global = 1 in {
    174     def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>;
    175     def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1, 1>;
    176   }
    177 }
    178 
    179 multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
    180   let is_flat_global = 1 in {
    181     def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>;
    182     def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1, 1>;
    183   }
    184 }
    185 
    186 class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
    187   bit EnableSaddr = 0>: FLAT_Pseudo<
    188   opName,
    189   (outs regClass:$vdst),
    190   !if(EnableSaddr,
    191       (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc),
    192       (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)),
    193   " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc"> {
    194   let has_data = 0;
    195   let mayLoad = 1;
    196   let has_saddr = 1;
    197   let enabled_saddr = EnableSaddr;
    198   let has_vaddr = !if(EnableSaddr, 0, 1);
    199   let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
    200   let maybeAtomic = 1;
    201 }
    202 
    203 class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo<
    204   opName,
    205   (outs),
    206   !if(EnableSaddr,
    207     (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc),
    208     (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)),
    209   " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc"> {
    210   let mayLoad  = 0;
    211   let mayStore = 1;
    212   let has_vdst = 0;
    213   let has_saddr = 1;
    214   let enabled_saddr = EnableSaddr;
    215   let has_vaddr = !if(EnableSaddr, 0, 1);
    216   let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
    217   let maybeAtomic = 1;
    218 }
    219 
    220 multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> {
    221   let is_flat_scratch = 1 in {
    222     def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>;
    223     def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>;
    224   }
    225 }
    226 
    227 multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
    228   let is_flat_scratch = 1 in {
    229     def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>;
    230     def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>;
    231   }
    232 }
    233 
    234 class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
    235                                string asm, list<dag> pattern = []> :
    236   FLAT_Pseudo<opName, outs, ins, asm, pattern> {
    237     let mayLoad = 1;
    238     let mayStore = 1;
    239     let has_glc  = 0;
    240     let glcValue = 0;
    241     let has_vdst = 0;
    242     let maybeAtomic = 1;
    243 }
    244 
    245 class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
    246                             string asm, list<dag> pattern = []>
    247   : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> {
    248   let hasPostISelHook = 1;
    249   let has_vdst = 1;
    250   let glcValue = 1;
    251   let PseudoInstr = NAME # "_RTN";
    252 }
    253 
    254 multiclass FLAT_Atomic_Pseudo<
    255   string opName,
    256   RegisterClass vdst_rc,
    257   ValueType vt,
    258   SDPatternOperator atomic = null_frag,
    259   ValueType data_vt = vt,
    260   RegisterClass data_rc = vdst_rc> {
    261   def "" : FLAT_AtomicNoRet_Pseudo <opName,
    262     (outs),
    263     (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc),
    264     " $vaddr, $vdata$offset$slc">,
    265     AtomicNoRet <opName, 0> {
    266     let PseudoInstr = NAME;
    267   }
    268 
    269   def _RTN : FLAT_AtomicRet_Pseudo <opName,
    270     (outs vdst_rc:$vdst),
    271     (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc),
    272     " $vdst, $vaddr, $vdata$offset glc$slc",
    273     [(set vt:$vdst,
    274       (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
    275        AtomicNoRet <opName, 1>;
    276 }
    277 
    278 multiclass FLAT_Global_Atomic_Pseudo<
    279   string opName,
    280   RegisterClass vdst_rc,
    281   ValueType vt,
    282   SDPatternOperator atomic = null_frag,
    283   ValueType data_vt = vt,
    284   RegisterClass data_rc = vdst_rc> {
    285 
    286   def "" : FLAT_AtomicNoRet_Pseudo <opName,
    287     (outs),
    288     (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
    289     " $vaddr, $vdata, off$offset$slc">,
    290     AtomicNoRet <opName, 0> {
    291     let has_saddr = 1;
    292     let PseudoInstr = NAME;
    293   }
    294 
    295   def _RTN : FLAT_AtomicRet_Pseudo <opName,
    296     (outs vdst_rc:$vdst),
    297       (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
    298     " $vdst, $vaddr, $vdata, off$offset glc$slc",
    299     [(set vt:$vdst,
    300       (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
    301       AtomicNoRet <opName, 1> {
    302     let has_saddr = 1;
    303   }
    304 
    305   def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
    306     (outs),
    307     (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc),
    308     " $vaddr, $vdata, $saddr$offset$slc">,
    309     AtomicNoRet <opName#"_saddr", 0> {
    310     let has_saddr = 1;
    311     let enabled_saddr = 1;
    312     let PseudoInstr = NAME#"_SADDR";
    313   }
    314 
    315   def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
    316     (outs vdst_rc:$vdst),
    317       (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc),
    318     " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
    319     AtomicNoRet <opName#"_saddr", 1> {
    320      let has_saddr = 1;
    321      let enabled_saddr = 1;
    322      let PseudoInstr = NAME#"_SADDR_RTN";
    323   }
    324 }
    325 
    326 class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
    327   (ops node:$ptr, node:$value),
    328   (atomic_op node:$ptr, node:$value),
    329   [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
    330 >;
    331 
    332 def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
    333 def atomic_swap_flat     : flat_binary_atomic_op<atomic_swap>;
    334 def atomic_add_flat      : flat_binary_atomic_op<atomic_load_add>;
    335 def atomic_and_flat      : flat_binary_atomic_op<atomic_load_and>;
    336 def atomic_max_flat      : flat_binary_atomic_op<atomic_load_max>;
    337 def atomic_min_flat      : flat_binary_atomic_op<atomic_load_min>;
    338 def atomic_or_flat       : flat_binary_atomic_op<atomic_load_or>;
    339 def atomic_sub_flat      : flat_binary_atomic_op<atomic_load_sub>;
    340 def atomic_umax_flat     : flat_binary_atomic_op<atomic_load_umax>;
    341 def atomic_umin_flat     : flat_binary_atomic_op<atomic_load_umin>;
    342 def atomic_xor_flat      : flat_binary_atomic_op<atomic_load_xor>;
    343 def atomic_inc_flat      : flat_binary_atomic_op<SIatomic_inc>;
    344 def atomic_dec_flat      : flat_binary_atomic_op<SIatomic_dec>;
    345 
    346 
    347 
    348 //===----------------------------------------------------------------------===//
    349 // Flat Instructions
    350 //===----------------------------------------------------------------------===//
    351 
    352 def FLAT_LOAD_UBYTE    : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
    353 def FLAT_LOAD_SBYTE    : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
    354 def FLAT_LOAD_USHORT   : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
    355 def FLAT_LOAD_SSHORT   : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
    356 def FLAT_LOAD_DWORD    : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
    357 def FLAT_LOAD_DWORDX2  : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
    358 def FLAT_LOAD_DWORDX4  : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
    359 def FLAT_LOAD_DWORDX3  : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
    360 
    361 def FLAT_STORE_BYTE    : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
    362 def FLAT_STORE_SHORT   : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
    363 def FLAT_STORE_DWORD   : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
    364 def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
    365 def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
    366 def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
    367 
    368 let SubtargetPredicate = HasD16LoadStore in {
    369 def FLAT_LOAD_UBYTE_D16     : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
    370 def FLAT_LOAD_UBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
    371 def FLAT_LOAD_SBYTE_D16     : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>;
    372 def FLAT_LOAD_SBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
    373 def FLAT_LOAD_SHORT_D16     : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>;
    374 def FLAT_LOAD_SHORT_D16_HI  : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
    375 
    376 def FLAT_STORE_BYTE_D16_HI  : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
    377 def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
    378 }
    379 
    380 defm FLAT_ATOMIC_CMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
    381                                 VGPR_32, i32, atomic_cmp_swap_flat,
    382                                 v2i32, VReg_64>;
    383 
    384 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
    385                                 VReg_64, i64, atomic_cmp_swap_flat,
    386                                 v2i64, VReg_128>;
    387 
    388 defm FLAT_ATOMIC_SWAP       : FLAT_Atomic_Pseudo <"flat_atomic_swap",
    389                                 VGPR_32, i32, atomic_swap_flat>;
    390 
    391 defm FLAT_ATOMIC_SWAP_X2    : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
    392                                 VReg_64, i64, atomic_swap_flat>;
    393 
    394 defm FLAT_ATOMIC_ADD        : FLAT_Atomic_Pseudo <"flat_atomic_add",
    395                                 VGPR_32, i32, atomic_add_flat>;
    396 
    397 defm FLAT_ATOMIC_SUB        : FLAT_Atomic_Pseudo <"flat_atomic_sub",
    398                                 VGPR_32, i32, atomic_sub_flat>;
    399 
    400 defm FLAT_ATOMIC_SMIN       : FLAT_Atomic_Pseudo <"flat_atomic_smin",
    401                                 VGPR_32, i32, atomic_min_flat>;
    402 
    403 defm FLAT_ATOMIC_UMIN       : FLAT_Atomic_Pseudo <"flat_atomic_umin",
    404                                 VGPR_32, i32, atomic_umin_flat>;
    405 
    406 defm FLAT_ATOMIC_SMAX       : FLAT_Atomic_Pseudo <"flat_atomic_smax",
    407                                 VGPR_32, i32, atomic_max_flat>;
    408 
    409 defm FLAT_ATOMIC_UMAX       : FLAT_Atomic_Pseudo <"flat_atomic_umax",
    410                                 VGPR_32, i32, atomic_umax_flat>;
    411 
    412 defm FLAT_ATOMIC_AND        : FLAT_Atomic_Pseudo <"flat_atomic_and",
    413                                 VGPR_32, i32, atomic_and_flat>;
    414 
    415 defm FLAT_ATOMIC_OR         : FLAT_Atomic_Pseudo <"flat_atomic_or",
    416                                 VGPR_32, i32, atomic_or_flat>;
    417 
    418 defm FLAT_ATOMIC_XOR        : FLAT_Atomic_Pseudo <"flat_atomic_xor",
    419                                 VGPR_32, i32, atomic_xor_flat>;
    420 
    421 defm FLAT_ATOMIC_INC        : FLAT_Atomic_Pseudo <"flat_atomic_inc",
    422                                 VGPR_32, i32, atomic_inc_flat>;
    423 
    424 defm FLAT_ATOMIC_DEC        : FLAT_Atomic_Pseudo <"flat_atomic_dec",
    425                                 VGPR_32, i32, atomic_dec_flat>;
    426 
    427 defm FLAT_ATOMIC_ADD_X2     : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
    428                                 VReg_64, i64, atomic_add_flat>;
    429 
    430 defm FLAT_ATOMIC_SUB_X2     : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
    431                                 VReg_64, i64, atomic_sub_flat>;
    432 
    433 defm FLAT_ATOMIC_SMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
    434                                 VReg_64, i64, atomic_min_flat>;
    435 
    436 defm FLAT_ATOMIC_UMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
    437                                 VReg_64, i64, atomic_umin_flat>;
    438 
    439 defm FLAT_ATOMIC_SMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
    440                                 VReg_64, i64, atomic_max_flat>;
    441 
    442 defm FLAT_ATOMIC_UMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
    443                                 VReg_64, i64, atomic_umax_flat>;
    444 
    445 defm FLAT_ATOMIC_AND_X2     : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
    446                                 VReg_64, i64, atomic_and_flat>;
    447 
    448 defm FLAT_ATOMIC_OR_X2      : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
    449                                 VReg_64, i64, atomic_or_flat>;
    450 
    451 defm FLAT_ATOMIC_XOR_X2     : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
    452                                 VReg_64, i64, atomic_xor_flat>;
    453 
    454 defm FLAT_ATOMIC_INC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
    455                                 VReg_64, i64, atomic_inc_flat>;
    456 
    457 defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
    458                                 VReg_64, i64, atomic_dec_flat>;
    459 
    460 let SubtargetPredicate = isCI in { // CI Only flat instructions : FIXME Only?
    461 
    462 defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
    463                                 VGPR_32, f32, null_frag, v2f32, VReg_64>;
    464 
    465 defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
    466                                 VReg_64, f64, null_frag, v2f64, VReg_128>;
    467 
    468 defm FLAT_ATOMIC_FMIN        : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
    469                                 VGPR_32, f32>;
    470 
    471 defm FLAT_ATOMIC_FMAX        : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
    472                                 VGPR_32, f32>;
    473 
    474 defm FLAT_ATOMIC_FMIN_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
    475                                 VReg_64, f64>;
    476 
    477 defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
    478                                 VReg_64, f64>;
    479 
    480 } // End SubtargetPredicate = isCI
    481 
    482 let SubtargetPredicate = HasFlatGlobalInsts in {
    483 defm GLOBAL_LOAD_UBYTE    : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
    484 defm GLOBAL_LOAD_SBYTE    : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
    485 defm GLOBAL_LOAD_USHORT   : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
    486 defm GLOBAL_LOAD_SSHORT   : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
    487 defm GLOBAL_LOAD_DWORD    : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
    488 defm GLOBAL_LOAD_DWORDX2  : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
    489 defm GLOBAL_LOAD_DWORDX3  : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
    490 defm GLOBAL_LOAD_DWORDX4  : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
    491 
    492 defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
    493 defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
    494 defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
    495 defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
    496 defm GLOBAL_LOAD_SHORT_D16    : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
    497 defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
    498 
    499 defm GLOBAL_STORE_BYTE    : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
    500 defm GLOBAL_STORE_SHORT   : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
    501 defm GLOBAL_STORE_DWORD   : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
    502 defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
    503 defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
    504 defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
    505 
    506 defm GLOBAL_STORE_BYTE_D16_HI  : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
    507 defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
    508 
    509 let is_flat_global = 1 in {
    510 defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
    511                                VGPR_32, i32, AMDGPUatomic_cmp_swap_global,
    512                                v2i32, VReg_64>;
    513 
    514 defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
    515                                   VReg_64, i64, AMDGPUatomic_cmp_swap_global,
    516                                   v2i64, VReg_128>;
    517 
    518 defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
    519                              VGPR_32, i32, atomic_swap_global>;
    520 
    521 defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
    522                                 VReg_64, i64, atomic_swap_global>;
    523 
    524 defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
    525                            VGPR_32, i32, atomic_add_global>;
    526 
    527 defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
    528                            VGPR_32, i32, atomic_sub_global>;
    529 
    530 defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
    531                             VGPR_32, i32, atomic_min_global>;
    532 
    533 defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
    534                             VGPR_32, i32, atomic_umin_global>;
    535 
    536 defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
    537                             VGPR_32, i32, atomic_max_global>;
    538 
    539 defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
    540                             VGPR_32, i32, atomic_umax_global>;
    541 
    542 defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
    543                            VGPR_32, i32, atomic_and_global>;
    544 
    545 defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
    546                           VGPR_32, i32, atomic_or_global>;
    547 
    548 defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
    549                            VGPR_32, i32, atomic_xor_global>;
    550 
    551 defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
    552                            VGPR_32, i32, atomic_inc_global>;
    553 
    554 defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
    555                            VGPR_32, i32, atomic_dec_global>;
    556 
    557 defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
    558                               VReg_64, i64, atomic_add_global>;
    559 
    560 defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
    561                               VReg_64, i64, atomic_sub_global>;
    562 
    563 defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
    564                                VReg_64, i64, atomic_min_global>;
    565 
    566 defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
    567                                VReg_64, i64, atomic_umin_global>;
    568 
    569 defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
    570                                VReg_64, i64, atomic_max_global>;
    571 
    572 defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
    573                                VReg_64, i64, atomic_umax_global>;
    574 
    575 defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
    576                               VReg_64, i64, atomic_and_global>;
    577 
    578 defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
    579                              VReg_64, i64, atomic_or_global>;
    580 
    581 defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
    582                               VReg_64, i64, atomic_xor_global>;
    583 
    584 defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
    585                               VReg_64, i64, atomic_inc_global>;
    586 
    587 defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
    588                               VReg_64, i64, atomic_dec_global>;
    589 } // End is_flat_global = 1
    590 
    591 } // End SubtargetPredicate = HasFlatGlobalInsts
    592 
    593 
    594 let SubtargetPredicate = HasFlatScratchInsts in {
    595 defm SCRATCH_LOAD_UBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
    596 defm SCRATCH_LOAD_SBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
    597 defm SCRATCH_LOAD_USHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
    598 defm SCRATCH_LOAD_SSHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
    599 defm SCRATCH_LOAD_DWORD    : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
    600 defm SCRATCH_LOAD_DWORDX2  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
    601 defm SCRATCH_LOAD_DWORDX3  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
    602 defm SCRATCH_LOAD_DWORDX4  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
    603 
    604 defm SCRATCH_LOAD_UBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32>;
    605 defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32>;
    606 defm SCRATCH_LOAD_SBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32>;
    607 defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32>;
    608 defm SCRATCH_LOAD_SHORT_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32>;
    609 defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32>;
    610 
    611 defm SCRATCH_STORE_BYTE    : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
    612 defm SCRATCH_STORE_SHORT   : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
    613 defm SCRATCH_STORE_DWORD   : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
    614 defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
    615 defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
    616 defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
    617 
    618 defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
    619 defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
    620 
    621 } // End SubtargetPredicate = HasFlatScratchInsts
    622 
    623 //===----------------------------------------------------------------------===//
    624 // Flat Patterns
    625 //===----------------------------------------------------------------------===//
    626 
    627 // Patterns for global loads with no offset.
    628 class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
    629   (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
    630   (inst $vaddr, $offset, 0, $slc)
    631 >;
    632 
    633 multiclass FlatLoadPat_Hi16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> {
    634   def : GCNPat <
    635     (build_vector vt:$elt0, (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)))),
    636     (v2i16 (inst $vaddr, $offset, 0, $slc, $elt0))
    637   >;
    638 
    639  def : GCNPat <
    640     (build_vector f16:$elt0, (f16 (bitconvert (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)))))),
    641     (v2f16 (inst $vaddr, $offset, 0, $slc, $elt0))
    642   >;
    643 }
    644 
    645 multiclass FlatSignedLoadPat_Hi16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> {
    646   def : GCNPat <
    647     (build_vector vt:$elt0, (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)))),
    648     (v2i16 (inst $vaddr, $offset, 0, $slc, $elt0))
    649   >;
    650 
    651  def : GCNPat <
    652     (build_vector f16:$elt0, (f16 (bitconvert (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)))))),
    653     (v2f16 (inst $vaddr, $offset, 0, $slc, $elt0))
    654   >;
    655 }
    656 
    657 multiclass FlatLoadPat_Lo16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> {
    658   def : GCNPat <
    659     (build_vector (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), (vt (Hi16Elt vt:$hi))),
    660     (v2i16 (inst $vaddr, $offset, 0, $slc, $hi))
    661   >;
    662 
    663  def : GCNPat <
    664     (build_vector (f16 (bitconvert (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))))), (f16 (Hi16Elt f16:$hi))),
    665     (v2f16 (inst $vaddr, $offset, 0, $slc, $hi))
    666   >;
    667 }
    668 
    669 multiclass FlatSignedLoadPat_Lo16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> {
    670   def : GCNPat <
    671     (build_vector (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))), (vt (Hi16Elt vt:$hi))),
    672     (v2i16 (inst $vaddr, $offset, 0, $slc, $hi))
    673   >;
    674 
    675  def : GCNPat <
    676     (build_vector (f16 (bitconvert (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))))), (f16 (Hi16Elt f16:$hi))),
    677     (v2f16 (inst $vaddr, $offset, 0, $slc, $hi))
    678   >;
    679 }
    680 
    681 class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
    682   (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
    683   (inst $vaddr, $offset, 0, $slc)
    684 >;
    685 
    686 class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
    687   (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))),
    688   (inst $vaddr, $offset, 0, $slc)
    689 >;
    690 
    691 class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
    692   (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
    693   (inst $vaddr, $data, $offset, 0, $slc)
    694 >;
    695 
    696 class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
    697   (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
    698   (inst $vaddr, $data, $offset, 0, $slc)
    699 >;
    700 
    701 class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
    702   // atomic store follows atomic binop convention so the address comes
    703   // first.
    704   (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
    705   (inst $vaddr, $data, $offset, 0, $slc)
    706 >;
    707 
    708 class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
    709   // atomic store follows atomic binop convention so the address comes
    710   // first.
    711   (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
    712   (inst $vaddr, $data, $offset, 0, $slc)
    713 >;
    714 
    715 class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
    716                      ValueType data_vt = vt> : GCNPat <
    717   (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
    718   (inst $vaddr, $data, $offset, $slc)
    719 >;
    720 
    721 class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
    722                      ValueType data_vt = vt> : GCNPat <
    723   (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
    724   (inst $vaddr, $data, $offset, $slc)
    725 >;
    726 
    727 let OtherPredicates = [HasFlatAddressSpace] in {
    728 
    729 def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_flat, i32>;
    730 def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
    731 def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_flat, i16>;
    732 def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
    733 def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_flat, i32>;
    734 def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
    735 def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
    736 def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, i32>;
    737 def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, v2i32>;
    738 def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>;
    739 
    740 def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_flat, i32>;
    741 def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_flat, i64>;
    742 
    743 def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
    744 def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
    745 def : FlatStorePat <FLAT_STORE_DWORD, store_flat, i32>;
    746 def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32>;
    747 def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32>;
    748 
    749 def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat, i32>;
    750 def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat, i64>;
    751 
    752 def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
    753 def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
    754 def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>;
    755 def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
    756 def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
    757 def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
    758 def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
    759 def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
    760 def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
    761 def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
    762 def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
    763 def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
    764 def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
    765 
    766 def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
    767 def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
    768 def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
    769 def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
    770 def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
    771 def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
    772 def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
    773 def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
    774 def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
    775 def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
    776 def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
    777 def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
    778 def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
    779 
    780 def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
    781 def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
    782 
    783 let OtherPredicates = [D16PreservesUnusedBits] in {
    784 def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
    785 def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
    786 
    787 let AddedComplexity = 3 in {
    788 defm : FlatLoadPat_Hi16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_flat>;
    789 defm : FlatLoadPat_Hi16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_flat>;
    790 defm : FlatLoadPat_Hi16 <FLAT_LOAD_SHORT_D16_HI, load_flat>;
    791 }
    792 
    793 let AddedComplexity = 9 in {
    794 defm : FlatLoadPat_Lo16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_flat>;
    795 defm : FlatLoadPat_Lo16 <FLAT_LOAD_SBYTE_D16, sextloadi8_flat>;
    796 defm : FlatLoadPat_Lo16 <FLAT_LOAD_SHORT_D16, load_flat>;
    797 }
    798 }
    799 
    800 } // End OtherPredicates = [HasFlatAddressSpace]
    801 
    802 let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
    803 
    804 def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, az_extloadi8_global, i32>;
    805 def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
    806 def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, az_extloadi8_global, i16>;
    807 def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
    808 def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, az_extloadi16_global, i32>;
    809 def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
    810 def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
    811 
    812 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, i32>;
    813 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, v2i32>;
    814 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>;
    815 
    816 def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_global, i32>;
    817 def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_global, i64>;
    818 
    819 def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
    820 def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
    821 def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
    822 def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16>;
    823 def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32>;
    824 def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32>;
    825 def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32>;
    826 
    827 let OtherPredicates = [D16PreservesUnusedBits] in {
    828 def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
    829 def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
    830 
    831 defm : FlatSignedLoadPat_Hi16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_global>;
    832 defm : FlatSignedLoadPat_Hi16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_global>;
    833 defm : FlatSignedLoadPat_Hi16 <GLOBAL_LOAD_SHORT_D16_HI, load_global>;
    834 
    835 defm : FlatSignedLoadPat_Lo16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_global>;
    836 defm : FlatSignedLoadPat_Lo16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_global>;
    837 defm : FlatSignedLoadPat_Lo16 <GLOBAL_LOAD_SHORT_D16, load_global>;
    838 
    839 }
    840 
    841 def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, store_atomic_global, i32>;
    842 def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64>;
    843 
    844 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_add_global, i32>;
    845 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
    846 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global, i32>;
    847 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
    848 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_and_global, i32>;
    849 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
    850 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
    851 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
    852 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
    853 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_or_global, i32>;
    854 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
    855 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
    856 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
    857 
    858 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
    859 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
    860 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
    861 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
    862 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
    863 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
    864 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
    865 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
    866 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
    867 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
    868 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
    869 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
    870 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
    871 
    872 } // End OtherPredicates = [HasFlatGlobalInsts]
    873 
    874 
    875 //===----------------------------------------------------------------------===//
    876 // Target
    877 //===----------------------------------------------------------------------===//
    878 
    879 //===----------------------------------------------------------------------===//
    880 // CI
    881 //===----------------------------------------------------------------------===//
    882 
    883 class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
    884   FLAT_Real <op, ps>,
    885   SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
    886   let AssemblerPredicate = isCIOnly;
    887   let DecoderNamespace="CI";
    888 }
    889 
    890 def FLAT_LOAD_UBYTE_ci         : FLAT_Real_ci <0x8,  FLAT_LOAD_UBYTE>;
    891 def FLAT_LOAD_SBYTE_ci         : FLAT_Real_ci <0x9,  FLAT_LOAD_SBYTE>;
    892 def FLAT_LOAD_USHORT_ci        : FLAT_Real_ci <0xa,  FLAT_LOAD_USHORT>;
    893 def FLAT_LOAD_SSHORT_ci        : FLAT_Real_ci <0xb,  FLAT_LOAD_SSHORT>;
    894 def FLAT_LOAD_DWORD_ci         : FLAT_Real_ci <0xc,  FLAT_LOAD_DWORD>;
    895 def FLAT_LOAD_DWORDX2_ci       : FLAT_Real_ci <0xd,  FLAT_LOAD_DWORDX2>;
    896 def FLAT_LOAD_DWORDX4_ci       : FLAT_Real_ci <0xe,  FLAT_LOAD_DWORDX4>;
    897 def FLAT_LOAD_DWORDX3_ci       : FLAT_Real_ci <0xf,  FLAT_LOAD_DWORDX3>;
    898 
    899 def FLAT_STORE_BYTE_ci         : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
    900 def FLAT_STORE_SHORT_ci        : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
    901 def FLAT_STORE_DWORD_ci        : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
    902 def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
    903 def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
    904 def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
    905 
    906 multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
    907   def _ci     : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
    908   def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
    909 }
    910 
    911 defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
    912 defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
    913 defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
    914 defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
    915 defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
    916 defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
    917 defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
    918 defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
    919 defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
    920 defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
    921 defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
    922 defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
    923 defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
    924 defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
    925 defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
    926 defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
    927 defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
    928 defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
    929 defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
    930 defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
    931 defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
    932 defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
    933 defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
    934 defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
    935 defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
    936 defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
    937 
    938 // CI Only flat instructions
    939 defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
    940 defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
    941 defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
    942 defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
    943 defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
    944 defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
    945 
    946 
    947 //===----------------------------------------------------------------------===//
    948 // VI
    949 //===----------------------------------------------------------------------===//
    950 
    951 class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
    952   FLAT_Real <op, ps>,
    953   SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
    954   let AssemblerPredicate = isVI;
    955   let DecoderNamespace="VI";
    956 }
    957 
    958 multiclass FLAT_Real_AllAddr_vi<bits<7> op> {
    959   def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>;
    960   def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
    961 }
    962 
    963 def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
    964 def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
    965 def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
    966 def FLAT_LOAD_SSHORT_vi        : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
    967 def FLAT_LOAD_DWORD_vi         : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
    968 def FLAT_LOAD_DWORDX2_vi       : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
    969 def FLAT_LOAD_DWORDX4_vi       : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
    970 def FLAT_LOAD_DWORDX3_vi       : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
    971 
    972 def FLAT_STORE_BYTE_vi         : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
    973 def FLAT_STORE_BYTE_D16_HI_vi  : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
    974 def FLAT_STORE_SHORT_vi        : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
    975 def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
    976 def FLAT_STORE_DWORD_vi        : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
    977 def FLAT_STORE_DWORDX2_vi      : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
    978 def FLAT_STORE_DWORDX4_vi      : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
    979 def FLAT_STORE_DWORDX3_vi      : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
    980 
    981 def FLAT_LOAD_UBYTE_D16_vi    : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
    982 def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
    983 def FLAT_LOAD_SBYTE_D16_vi    : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
    984 def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
    985 def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
    986 def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
    987 
    988 multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> {
    989   def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
    990   def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
    991 }
    992 
    993 multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> :
    994   FLAT_Real_AllAddr_vi<op> {
    995   def _RTN_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
    996   def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
    997 }
    998 
    999 
   1000 defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
   1001 defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
   1002 defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
   1003 defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
   1004 defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
   1005 defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
   1006 defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
   1007 defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
   1008 defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
   1009 defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
   1010 defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
   1011 defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
   1012 defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
   1013 defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
   1014 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
   1015 defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
   1016 defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
   1017 defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
   1018 defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
   1019 defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
   1020 defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
   1021 defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
   1022 defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
   1023 defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
   1024 defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
   1025 defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
   1026 
   1027 defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
   1028 defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
   1029 defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
   1030 defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
   1031 defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
   1032 defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
   1033 defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
   1034 defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
   1035 
   1036 defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Real_AllAddr_vi <0x20>;
   1037 defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
   1038 defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Real_AllAddr_vi <0x22>;
   1039 defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
   1040 defm GLOBAL_LOAD_SHORT_D16    : FLAT_Real_AllAddr_vi <0x24>;
   1041 defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
   1042 
   1043 defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
   1044 defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
   1045 defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
   1046 defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
   1047 defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
   1048 defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
   1049 defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
   1050 defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
   1051 
   1052 
   1053 defm GLOBAL_ATOMIC_SWAP       : FLAT_Global_Real_Atomics_vi <0x40>;
   1054 defm GLOBAL_ATOMIC_CMPSWAP    : FLAT_Global_Real_Atomics_vi <0x41>;
   1055 defm GLOBAL_ATOMIC_ADD        : FLAT_Global_Real_Atomics_vi <0x42>;
   1056 defm GLOBAL_ATOMIC_SUB        : FLAT_Global_Real_Atomics_vi <0x43>;
   1057 defm GLOBAL_ATOMIC_SMIN       : FLAT_Global_Real_Atomics_vi <0x44>;
   1058 defm GLOBAL_ATOMIC_UMIN       : FLAT_Global_Real_Atomics_vi <0x45>;
   1059 defm GLOBAL_ATOMIC_SMAX       : FLAT_Global_Real_Atomics_vi <0x46>;
   1060 defm GLOBAL_ATOMIC_UMAX       : FLAT_Global_Real_Atomics_vi <0x47>;
   1061 defm GLOBAL_ATOMIC_AND        : FLAT_Global_Real_Atomics_vi <0x48>;
   1062 defm GLOBAL_ATOMIC_OR         : FLAT_Global_Real_Atomics_vi <0x49>;
   1063 defm GLOBAL_ATOMIC_XOR        : FLAT_Global_Real_Atomics_vi <0x4a>;
   1064 defm GLOBAL_ATOMIC_INC        : FLAT_Global_Real_Atomics_vi <0x4b>;
   1065 defm GLOBAL_ATOMIC_DEC        : FLAT_Global_Real_Atomics_vi <0x4c>;
   1066 defm GLOBAL_ATOMIC_SWAP_X2    : FLAT_Global_Real_Atomics_vi <0x60>;
   1067 defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>;
   1068 defm GLOBAL_ATOMIC_ADD_X2     : FLAT_Global_Real_Atomics_vi <0x62>;
   1069 defm GLOBAL_ATOMIC_SUB_X2     : FLAT_Global_Real_Atomics_vi <0x63>;
   1070 defm GLOBAL_ATOMIC_SMIN_X2    : FLAT_Global_Real_Atomics_vi <0x64>;
   1071 defm GLOBAL_ATOMIC_UMIN_X2    : FLAT_Global_Real_Atomics_vi <0x65>;
   1072 defm GLOBAL_ATOMIC_SMAX_X2    : FLAT_Global_Real_Atomics_vi <0x66>;
   1073 defm GLOBAL_ATOMIC_UMAX_X2    : FLAT_Global_Real_Atomics_vi <0x67>;
   1074 defm GLOBAL_ATOMIC_AND_X2     : FLAT_Global_Real_Atomics_vi <0x68>;
   1075 defm GLOBAL_ATOMIC_OR_X2      : FLAT_Global_Real_Atomics_vi <0x69>;
   1076 defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
   1077 defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
   1078 defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
   1079 
   1080 defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_vi <0x10>;
   1081 defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_vi <0x11>;
   1082 defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_vi <0x12>;
   1083 defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_vi <0x13>;
   1084 defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_vi <0x14>;
   1085 defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_vi <0x15>;
   1086 defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_vi <0x16>;
   1087 defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_vi <0x17>;
   1088 defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_vi <0x18>;
   1089 defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x19>;
   1090 defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_vi <0x20>;
   1091 defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x21>;
   1092 defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_vi <0x22>;
   1093 defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x23>;
   1094 defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_vi <0x24>;
   1095 defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_vi <0x25>;
   1096 defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_vi <0x1a>;
   1097 defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
   1098 defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_vi <0x1c>;
   1099 defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_vi <0x1d>;
   1100 defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_vi <0x1e>;
   1101 defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_vi <0x1f>;
   1102