Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
      3 
      4 declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32) #2
      5 declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
      6 declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2
      7 
      8 declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64) #2
      9 declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64) #2
     10 declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2
     11 
     12 declare i32 @llvm.amdgcn.workitem.id.x() #1
     13 
     14 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
     15 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     16 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
     17 define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
     18   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
     19   store i32 %result, i32 addrspace(1)* %out
     20   ret void
     21 }
     22 
     23 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
     24 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     25 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
     26 define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
     27   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
     28   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
     29   store i32 %result, i32 addrspace(1)* %out
     30   ret void
     31 }
     32 
     33 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
     34 ; GCN: s_load_dword [[SPTR:s[0-9]+]],
     35 ; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
     36 ; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
     37 ; GCN: ds_dec_u32 [[VPTR]], [[DATA]]
     38 define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
     39   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
     40   ret void
     41 }
     42 
     43 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
     44 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     45 ; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16
     46 define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
     47   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
     48   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
     49   ret void
     50 }
     51 
     52 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32:
     53 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     54 ; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
     55 define void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
     56   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
     57   store i32 %result, i32 addrspace(1)* %out
     58   ret void
     59 }
     60 
     61 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset:
     62 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     63 ; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
     64 define void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
     65   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
     66   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
     67   store i32 %result, i32 addrspace(1)* %out
     68   ret void
     69 }
     70 
     71 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32:
     72 ; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
     73 define void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind {
     74   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
     75   ret void
     76 }
     77 
     78 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32_offset:
     79 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     80 ; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
     81 define void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
     82   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
     83   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
     84   ret void
     85 }
     86 
     87 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset_addr64:
     88 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     89 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
     90 ; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
     91 define void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
     92   %id = call i32 @llvm.amdgcn.workitem.id.x()
     93   %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
     94   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
     95   %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
     96   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
     97   store i32 %result, i32 addrspace(1)* %out.gep
     98   ret void
     99 }
    100 
    101 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset_addr64:
    102 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    103 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
    104 ; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    105 define void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
    106   %id = call i32 @llvm.amdgcn.workitem.id.x()
    107   %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
    108   %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
    109   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
    110   ret void
    111 }
    112 
    113 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32:
    114 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    115 ; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    116 define void @flat_atomic_dec_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
    117   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42)
    118   store i32 %result, i32 addrspace(4)* %out
    119   ret void
    120 }
    121 
    122 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset:
    123 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    124 ; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    125 define void @flat_atomic_dec_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
    126   %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
    127   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42)
    128   store i32 %result, i32 addrspace(4)* %out
    129   ret void
    130 }
    131 
    132 ; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i32:
    133 ; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    134 define void @flat_atomic_dec_noret_i32(i32 addrspace(4)* %ptr) nounwind {
    135   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42)
    136   ret void
    137 }
    138 
    139 ; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i32_offset:
    140 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    141 ; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    142 define void @flat_atomic_dec_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind {
    143   %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
    144   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42)
    145   ret void
    146 }
    147 
    148 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset_addr64:
    149 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    150 ; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    151 define void @flat_atomic_dec_ret_i32_offset_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
    152   %id = call i32 @llvm.amdgcn.workitem.id.x()
    153   %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
    154   %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 %id
    155   %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
    156   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42)
    157   store i32 %result, i32 addrspace(4)* %out.gep
    158   ret void
    159 }
    160 
    161 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset_addr64:
    162 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    163 ; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    164 define void @flat_atomic_dec_noret_i32_offset_addr64(i32 addrspace(4)* %ptr) #0 {
    165   %id = call i32 @llvm.amdgcn.workitem.id.x()
    166   %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
    167   %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
    168   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42)
    169   ret void
    170 }
    171 
    172 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64:
    173 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    174 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    175 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    176 define void @flat_atomic_dec_ret_i64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
    177   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42)
    178   store i64 %result, i64 addrspace(4)* %out
    179   ret void
    180 }
    181 
    182 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset:
    183 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    184 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    185 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    186 define void @flat_atomic_dec_ret_i64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
    187   %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
    188   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42)
    189   store i64 %result, i64 addrspace(4)* %out
    190   ret void
    191 }
    192 
    193 ; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i64:
    194 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    195 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    196 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
    197 define void @flat_atomic_dec_noret_i64(i64 addrspace(4)* %ptr) nounwind {
    198   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42)
    199   ret void
    200 }
    201 
    202 ; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i64_offset:
    203 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    204 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    205 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
    206 define void @flat_atomic_dec_noret_i64_offset(i64 addrspace(4)* %ptr) nounwind {
    207   %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
    208   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42)
    209   ret void
    210 }
    211 
    212 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset_addr64:
    213 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    214 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    215 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    216 define void @flat_atomic_dec_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
    217   %id = call i32 @llvm.amdgcn.workitem.id.x()
    218   %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
    219   %out.gep = getelementptr i64, i64 addrspace(4)* %out, i32 %id
    220   %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
    221   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42)
    222   store i64 %result, i64 addrspace(4)* %out.gep
    223   ret void
    224 }
    225 
    226 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset_addr64:
    227 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    228 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    229 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
    230 define void @flat_atomic_dec_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 {
    231   %id = call i32 @llvm.amdgcn.workitem.id.x()
    232   %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
    233   %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
    234   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42)
    235   ret void
    236 }
    237 
    238 @lds0 = addrspace(3) global [512 x i32] undef
    239 
    240 ; SI-LABEL: {{^}}atomic_dec_shl_base_lds_0:
    241 ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
    242 ; SI: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]] offset:8
    243 define void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
    244   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
    245   %idx.0 = add nsw i32 %tid.x, 2
    246   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
    247   %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9)
    248   store i32 %idx.0, i32 addrspace(1)* %add_use
    249   store i32 %val0, i32 addrspace(1)* %out
    250   ret void
    251 }
    252 
    253 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64:
    254 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    255 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    256 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
    257 define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
    258   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
    259   store i64 %result, i64 addrspace(1)* %out
    260   ret void
    261 }
    262 
    263 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
    264 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    265 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    266 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
    267 define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
    268   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    269   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
    270   store i64 %result, i64 addrspace(1)* %out
    271   ret void
    272 }
    273 
    274 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
    275 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    276 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    277 ; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
    278 define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    279   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
    280   ret void
    281 }
    282 
    283 ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
    284 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    285 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    286 ; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
    287 define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    288   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    289   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
    290   ret void
    291 }
    292 
    293 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64:
    294 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    295 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    296 ; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
    297 define void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
    298   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
    299   store i64 %result, i64 addrspace(1)* %out
    300   ret void
    301 }
    302 
    303 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset:
    304 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    305 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    306 ; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
    307 define void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
    308   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
    309   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
    310   store i64 %result, i64 addrspace(1)* %out
    311   ret void
    312 }
    313 
    314 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64:
    315 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    316 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    317 ; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
    318 define void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
    319   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
    320   ret void
    321 }
    322 
    323 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64_offset:
    324 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    325 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    326 ; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
    327 define void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
    328   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
    329   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
    330   ret void
    331 }
    332 
    333 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64:
    334 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    335 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    336 ; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
    337 ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    338 define void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
    339   %id = call i32 @llvm.amdgcn.workitem.id.x()
    340   %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
    341   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
    342   %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
    343   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
    344   store i64 %result, i64 addrspace(1)* %out.gep
    345   ret void
    346 }
    347 
    348 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64:
    349 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    350 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    351 ; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
    352 ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
    353 define void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
    354   %id = call i32 @llvm.amdgcn.workitem.id.x()
    355   %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
    356   %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
    357   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
    358   ret void
    359 }
    360 
    361 @lds1 = addrspace(3) global [512 x i64] undef, align 8
    362 
    363 ; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0_i64:
    364 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
    365 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
    366 define void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
    367   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
    368   %idx.0 = add nsw i32 %tid.x, 2
    369   %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
    370   %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9)
    371   store i32 %idx.0, i32 addrspace(1)* %add_use
    372   store i64 %val0, i64 addrspace(1)* %out
    373   ret void
    374 }
    375 
    376 attributes #0 = { nounwind }
    377 attributes #1 = { nounwind readnone }
    378 attributes #2 = { nounwind argmemonly }
    379 
    380 
    381 
    382 
    383 
    384 
    385 
    386 
    387 
    388