Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s
      3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
      4 
      5 declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
      6 declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
      7 declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
      8 
      9 declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
     10 declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
     11 declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
     12 
     13 declare i32 @llvm.amdgcn.workitem.id.x() #1
     14 
     15 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32:
     16 ; CIVI-DAG: s_mov_b32 m0
     17 ; GFX9-NOT: m0
     18 
     19 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
     20 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
     21 define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
     22   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
     23   store i32 %result, i32 addrspace(1)* %out
     24   ret void
     25 }
     26 
     27 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
     28 ; CIVI-DAG: s_mov_b32 m0
     29 ; GFX9-NOT: m0
     30 
     31 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
     32 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
     33 define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
     34   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
     35   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
     36   store i32 %result, i32 addrspace(1)* %out
     37   ret void
     38 }
     39 
     40 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32:
     41 ; CIVI-DAG: s_mov_b32 m0
     42 ; GFX9-NOT: m0
     43 
     44 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
     45 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
     46 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
     47 ; GCN: ds_inc_u32 [[VPTR]], [[DATA]]
     48 define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
     49   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
     50   ret void
     51 }
     52 
     53 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
     54 ; CIVI-DAG: s_mov_b32 m0
     55 ; GFX9-NOT: m0
     56 
     57 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
     58 ; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16
     59 define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
     60   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
     61   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
     62   ret void
     63 }
     64 
     65 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32:
     66 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     67 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
     68 ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off glc{{$}}
     69 define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
     70   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
     71   store i32 %result, i32 addrspace(1)* %out
     72   ret void
     73 }
     74 
     75 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset:
     76 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     77 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
     78 ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16 glc{{$}}
     79 define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
     80   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
     81   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
     82   store i32 %result, i32 addrspace(1)* %out
     83   ret void
     84 }
     85 
     86 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32:
     87 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     88 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
     89 ; GFX9: global_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]], off{{$}}
     90 define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind {
     91   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
     92   ret void
     93 }
     94 
     95 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset:
     96 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     97 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
     98 ; GFX9: global_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16{{$}}
     99 define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
    100   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
    101   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
    102   ret void
    103 }
    104 
    105 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset_addr64:
    106 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    107 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
    108 ; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    109 define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
    110   %id = call i32 @llvm.amdgcn.workitem.id.x()
    111   %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
    112   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
    113   %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
    114   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
    115   store i32 %result, i32 addrspace(1)* %out.gep
    116   ret void
    117 }
    118 
    119 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset_addr64:
    120 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    121 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
    122 ; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    123 define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
    124   %id = call i32 @llvm.amdgcn.workitem.id.x()
    125   %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
    126   %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
    127   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
    128   ret void
    129 }
    130 
    131 @lds0 = addrspace(3) global [512 x i32] undef, align 4
    132 
    133 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32:
    134 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
    135 ; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
    136 define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
    137   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
    138   %idx.0 = add nsw i32 %tid.x, 2
    139   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
    140   %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false)
    141   store i32 %idx.0, i32 addrspace(1)* %add_use
    142   store i32 %val0, i32 addrspace(1)* %out
    143   ret void
    144 }
    145 
    146 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
    147 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    148 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    149 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
    150 define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
    151   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
    152   store i64 %result, i64 addrspace(1)* %out
    153   ret void
    154 }
    155 
    156 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
    157 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    158 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    159 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
    160 define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
    161   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    162   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
    163   store i64 %result, i64 addrspace(1)* %out
    164   ret void
    165 }
    166 
    167 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64:
    168 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    169 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    170 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
    171 define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    172   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
    173   ret void
    174 }
    175 
    176 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
    177 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    178 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    179 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
    180 define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    181   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    182   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
    183   ret void
    184 }
    185 
    186 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64:
    187 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    188 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    189 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
    190 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off glc{{$}}
    191 define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
    192   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
    193   store i64 %result, i64 addrspace(1)* %out
    194   ret void
    195 }
    196 
    197 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset:
    198 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    199 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    200 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
    201 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32 glc{{$}}
    202 define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
    203   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
    204   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
    205   store i64 %result, i64 addrspace(1)* %out
    206   ret void
    207 }
    208 
    209 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64:
    210 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    211 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    212 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
    213 
    214 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off{{$}}
    215 define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
    216   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
    217   ret void
    218 }
    219 
    220 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset:
    221 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    222 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    223 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
    224 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32{{$}}
    225 define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
    226   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
    227   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
    228   ret void
    229 }
    230 
    231 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64:
    232 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    233 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
    234 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    235 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
    236 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    237 define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
    238   %id = call i32 @llvm.amdgcn.workitem.id.x()
    239   %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
    240   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
    241   %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
    242   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
    243   store i64 %result, i64 addrspace(1)* %out.gep
    244   ret void
    245 }
    246 
    247 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64:
    248 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    249 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
    250 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    251 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
    252 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
    253 define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
    254   %id = call i32 @llvm.amdgcn.workitem.id.x()
    255   %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
    256   %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
    257   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
    258   ret void
    259 }
    260 
    261 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32:
    262 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    263 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    264 define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 {
    265   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
    266   store i32 %result, i32* %out
    267   ret void
    268 }
    269 
    270 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset:
    271 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    272 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    273 ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
    274 define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 {
    275   %gep = getelementptr i32, i32* %ptr, i32 4
    276   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
    277   store i32 %result, i32* %out
    278   ret void
    279 }
    280 
    281 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32:
    282 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    283 ; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    284 define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind {
    285   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
    286   ret void
    287 }
    288 
    289 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset:
    290 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    291 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    292 ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}}
    293 define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind {
    294   %gep = getelementptr i32, i32* %ptr, i32 4
    295   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
    296   ret void
    297 }
    298 
    299 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset_addr64:
    300 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    301 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    302 ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
    303 define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
    304   %id = call i32 @llvm.amdgcn.workitem.id.x()
    305   %gep.tid = getelementptr i32, i32* %ptr, i32 %id
    306   %out.gep = getelementptr i32, i32* %out, i32 %id
    307   %gep = getelementptr i32, i32* %gep.tid, i32 5
    308   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
    309   store i32 %result, i32* %out.gep
    310   ret void
    311 }
    312 
    313 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset_addr64:
    314 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    315 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    316 ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
    317 define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 {
    318   %id = call i32 @llvm.amdgcn.workitem.id.x()
    319   %gep.tid = getelementptr i32, i32* %ptr, i32 %id
    320   %gep = getelementptr i32, i32* %gep.tid, i32 5
    321   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
    322   ret void
    323 }
    324 
    325 @lds1 = addrspace(3) global [512 x i64] undef, align 8
    326 
    327 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64:
    328 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
    329 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
    330 define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
    331   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
    332   %idx.0 = add nsw i32 %tid.x, 2
    333   %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
    334   %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false)
    335   store i32 %idx.0, i32 addrspace(1)* %add_use
    336   store i64 %val0, i64 addrspace(1)* %out
    337   ret void
    338 }
    339 
    340 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64:
    341 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    342 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    343 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    344 define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 {
    345   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
    346   store i64 %result, i64* %out
    347   ret void
    348 }
    349 
    350 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset:
    351 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    352 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    353 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    354 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}}
    355 define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 {
    356   %gep = getelementptr i64, i64* %ptr, i32 4
    357   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
    358   store i64 %result, i64* %out
    359   ret void
    360 }
    361 
    362 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64:
    363 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    364 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    365 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
    366 define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind {
    367   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
    368   ret void
    369 }
    370 
    371 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset:
    372 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    373 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    374 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
    375 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
    376 define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind {
    377   %gep = getelementptr i64, i64* %ptr, i32 4
    378   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
    379   ret void
    380 }
    381 
    382 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset_addr64:
    383 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    384 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    385 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    386 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}}
    387 define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
    388   %id = call i32 @llvm.amdgcn.workitem.id.x()
    389   %gep.tid = getelementptr i64, i64* %ptr, i32 %id
    390   %out.gep = getelementptr i64, i64* %out, i32 %id
    391   %gep = getelementptr i64, i64* %gep.tid, i32 5
    392   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
    393   store i64 %result, i64* %out.gep
    394   ret void
    395 }
    396 
    397 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset_addr64:
    398 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    399 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    400 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
    401 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}}
    402 define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 {
    403   %id = call i32 @llvm.amdgcn.workitem.id.x()
    404   %gep.tid = getelementptr i64, i64* %ptr, i32 %id
    405   %gep = getelementptr i64, i64* %gep.tid, i32 5
    406   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
    407   ret void
    408 }
    409 
    410 ; GCN-LABEL: {{^}}nocse_lds_atomic_inc_ret_i32:
    411 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    412 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
    413 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
    414 define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 {
    415   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
    416   %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
    417 
    418   store i32 %result0, i32 addrspace(1)* %out0
    419   store i32 %result1, i32 addrspace(1)* %out1
    420   ret void
    421 }
    422 
    423 attributes #0 = { nounwind }
    424 attributes #1 = { nounwind readnone }
    425 attributes #2 = { nounwind argmemonly }
    426