Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,CIVI %s
      3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
      4 
      5 declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
      6 declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
      7 declare i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
      8 
      9 declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
     10 declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
     11 declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
     12 
     13 declare i32 @llvm.amdgcn.workitem.id.x() #1
     14 
     15 ; Make sure no crash on invalid non-constant
     16 ; GCN-LABEL: {{^}}invalid_variable_order_lds_atomic_dec_ret_i32:
     17 ; CIVI-DAG: s_mov_b32 m0
     18 ; GFX9-NOT: m0
     19 define amdgpu_kernel void @invalid_variable_order_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %order.var) #0 {
     20   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %order.var, i32 0, i1 false)
     21   store i32 %result, i32 addrspace(1)* %out
     22   ret void
     23 }
     24 
     25 ; Make sure no crash on invalid non-constant
     26 ; GCN-LABEL: {{^}}invalid_variable_scope_lds_atomic_dec_ret_i32:
     27 ; CIVI-DAG: s_mov_b32 m0
     28 ; GFX9-NOT: m0
     29 define amdgpu_kernel void @invalid_variable_scope_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %scope.var) #0 {
     30   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %scope.var, i1 false)
     31   store i32 %result, i32 addrspace(1)* %out
     32   ret void
     33 }
     34 
     35 ; Make sure no crash on invalid non-constant
     36 ; GCN-LABEL: {{^}}invalid_variable_volatile_lds_atomic_dec_ret_i32:
     37 define amdgpu_kernel void @invalid_variable_volatile_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i1 %volatile.var) #0 {
     38   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %volatile.var)
     39   store i32 %result, i32 addrspace(1)* %out
     40   ret void
     41 }
     42 
     43 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
     44 ; CIVI-DAG: s_mov_b32 m0
     45 ; GFX9-NOT: m0
     46 
     47 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
     48 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
     49 define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
     50   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
     51   store i32 %result, i32 addrspace(1)* %out
     52   ret void
     53 }
     54 
     55 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
     56 ; CIVI-DAG: s_mov_b32 m0
     57 ; GFX9-NOT: m0
     58 
     59 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
     60 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
     61 define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
     62   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
     63   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
     64   store i32 %result, i32 addrspace(1)* %out
     65   ret void
     66 }
     67 
     68 ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i32:
     69 ; CIVI-DAG: s_mov_b32 m0
     70 ; GFX9-NOT: m0
     71 
     72 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
     73 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
     74 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
     75 ; GCN: ds_dec_u32 [[VPTR]], [[DATA]]
     76 define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
     77   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
     78   ret void
     79 }
     80 
     81 ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
     82 ; CIVI-DAG: s_mov_b32 m0
     83 ; GFX9-NOT: m0
     84 
     85 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
     86 ; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16
     87 define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
     88   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
     89   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
     90   ret void
     91 }
     92 
     93 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32:
     94 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     95 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
     96 ; GFX9: global_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off glc{{$}}
     97 define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
     98   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
     99   store i32 %result, i32 addrspace(1)* %out
    100   ret void
    101 }
    102 
    103 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset:
    104 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    105 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
    106 ; GFX9: global_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16 glc{{$}}
    107 define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
    108   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
    109   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
    110   store i32 %result, i32 addrspace(1)* %out
    111   ret void
    112 }
    113 
    114 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i32:
    115 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    116 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
    117 ; GFX9: global_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]], off{{$}}
    118 define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind {
    119   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
    120   ret void
    121 }
    122 
    123 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset:
    124 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    125 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
    126 ; GFX9: global_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16{{$}}
    127 define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
    128   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
    129   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
    130   ret void
    131 }
    132 
    133 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset_addr64:
    134 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    135 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
    136 ; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    137 define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
    138   %id = call i32 @llvm.amdgcn.workitem.id.x()
    139   %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
    140   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
    141   %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
    142   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
    143   store i32 %result, i32 addrspace(1)* %out.gep
    144   ret void
    145 }
    146 
    147 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset_addr64:
    148 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    149 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
    150 ; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    151 define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
    152   %id = call i32 @llvm.amdgcn.workitem.id.x()
    153   %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
    154   %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
    155   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
    156   ret void
    157 }
    158 
    159 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32:
    160 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    161 ; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    162 define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 {
    163   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
    164   store i32 %result, i32* %out
    165   ret void
    166 }
    167 
    168 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset:
    169 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    170 ; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    171 ; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
    172 define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr) #0 {
    173   %gep = getelementptr i32, i32* %ptr, i32 4
    174   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
    175   store i32 %result, i32* %out
    176   ret void
    177 }
    178 
    179 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32:
    180 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    181 ; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    182 define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind {
    183   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
    184   ret void
    185 }
    186 
    187 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset:
    188 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    189 ; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    190 ; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
    191 define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind {
    192   %gep = getelementptr i32, i32* %ptr, i32 4
    193   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
    194   ret void
    195 }
    196 
    197 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset_addr64:
    198 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    199 ; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    200 ; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
    201 define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
    202   %id = call i32 @llvm.amdgcn.workitem.id.x()
    203   %gep.tid = getelementptr i32, i32* %ptr, i32 %id
    204   %out.gep = getelementptr i32, i32* %out, i32 %id
    205   %gep = getelementptr i32, i32* %gep.tid, i32 5
    206   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
    207   store i32 %result, i32* %out.gep
    208   ret void
    209 }
    210 
    211 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset_addr64:
    212 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    213 ; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    214 ; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
    215 define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0 {
    216   %id = call i32 @llvm.amdgcn.workitem.id.x()
    217   %gep.tid = getelementptr i32, i32* %ptr, i32 %id
    218   %gep = getelementptr i32, i32* %gep.tid, i32 5
    219   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
    220   ret void
    221 }
    222 
    223 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64:
    224 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    225 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    226 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    227 define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 {
    228   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
    229   store i64 %result, i64* %out
    230   ret void
    231 }
    232 
    233 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset:
    234 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    235 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    236 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    237 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
    238 define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) #0 {
    239   %gep = getelementptr i64, i64* %ptr, i32 4
    240   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
    241   store i64 %result, i64* %out
    242   ret void
    243 }
    244 
    245 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64:
    246 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    247 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    248 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
    249 define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind {
    250   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
    251   ret void
    252 }
    253 
    254 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset:
    255 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    256 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    257 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
    258 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
    259 define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind {
    260   %gep = getelementptr i64, i64* %ptr, i32 4
    261   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
    262   ret void
    263 }
    264 
    265 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset_addr64:
    266 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    267 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    268 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    269 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
    270 define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
    271   %id = call i32 @llvm.amdgcn.workitem.id.x()
    272   %gep.tid = getelementptr i64, i64* %ptr, i32 %id
    273   %out.gep = getelementptr i64, i64* %out, i32 %id
    274   %gep = getelementptr i64, i64* %gep.tid, i32 5
    275   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
    276   store i64 %result, i64* %out.gep
    277   ret void
    278 }
    279 
    280 ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset_addr64:
    281 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    282 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    283 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
    284 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
    285 define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 {
    286   %id = call i32 @llvm.amdgcn.workitem.id.x()
    287   %gep.tid = getelementptr i64, i64* %ptr, i32 %id
    288   %gep = getelementptr i64, i64* %gep.tid, i32 5
    289   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
    290   ret void
    291 }
    292 
    293 @lds0 = addrspace(3) global [512 x i32] undef
    294 
    295 ; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0:
    296 ; CIVI-DAG: s_mov_b32 m0
    297 ; GFX9-NOT: m0
    298 
    299 ; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
    300 ; GCN: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
    301 define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
    302   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
    303   %idx.0 = add nsw i32 %tid.x, 2
    304   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
    305   %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false)
    306   store i32 %idx.0, i32 addrspace(1)* %add_use
    307   store i32 %val0, i32 addrspace(1)* %out
    308   ret void
    309 }
    310 
    311 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64:
    312 ; CIVI-DAG: s_mov_b32 m0
    313 ; GFX9-NOT: m0
    314 
    315 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    316 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    317 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
    318 define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
    319   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
    320   store i64 %result, i64 addrspace(1)* %out
    321   ret void
    322 }
    323 
    324 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
    325 ; CIVI-DAG: s_mov_b32 m0
    326 ; GFX9-NOT: m0
    327 
    328 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    329 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    330 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
    331 define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
    332   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    333   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
    334   store i64 %result, i64 addrspace(1)* %out
    335   ret void
    336 }
    337 
    338 ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i64:
    339 ; CIVI-DAG: s_mov_b32 m0
    340 ; GFX9-NOT: m0
    341 
    342 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    343 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    344 ; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
    345 define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    346   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
    347   ret void
    348 }
    349 
    350 ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
    351 ; CIVI-DAG: s_mov_b32 m0
    352 ; GFX9-NOT: m0
    353 
    354 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    355 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    356 ; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
    357 define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    358   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    359   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
    360   ret void
    361 }
    362 
    363 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64:
    364 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    365 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    366 ; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
    367 ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off glc{{$}}
    368 define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
    369   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
    370   store i64 %result, i64 addrspace(1)* %out
    371   ret void
    372 }
    373 
    374 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset:
    375 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    376 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    377 ; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
    378 ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32 glc{{$}}
    379 define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
    380   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
    381   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
    382   store i64 %result, i64 addrspace(1)* %out
    383   ret void
    384 }
    385 
    386 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64:
    387 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    388 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    389 ; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
    390 ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off{{$}}
    391 define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
    392   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
    393   ret void
    394 }
    395 
    396 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset:
    397 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    398 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    399 ; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
    400 ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32{{$}}
    401 define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
    402   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
    403   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
    404   ret void
    405 }
    406 
    407 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64:
    408 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    409 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
    410 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    411 ; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
    412 ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    413 define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
    414   %id = call i32 @llvm.amdgcn.workitem.id.x()
    415   %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
    416   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
    417   %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
    418   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
    419   store i64 %result, i64 addrspace(1)* %out.gep
    420   ret void
    421 }
    422 
    423 ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64:
    424 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    425 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
    426 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    427 ; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
    428 ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
    429 define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
    430   %id = call i32 @llvm.amdgcn.workitem.id.x()
    431   %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
    432   %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
    433   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
    434   ret void
    435 }
    436 
    437 @lds1 = addrspace(3) global [512 x i64] undef, align 8
    438 
    439 ; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0_i64:
    440 ; CIVI-DAG: s_mov_b32 m0
    441 ; GFX9-NOT: m0
    442 
    443 ; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
    444 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
    445 define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
    446   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
    447   %idx.0 = add nsw i32 %tid.x, 2
    448   %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
    449   %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false)
    450   store i32 %idx.0, i32 addrspace(1)* %add_use
    451   store i64 %val0, i64 addrspace(1)* %out
    452   ret void
    453 }
    454 
    455 attributes #0 = { nounwind }
    456 attributes #1 = { nounwind readnone }
    457 attributes #2 = { nounwind argmemonly }
    458