1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s 3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 4 5 declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2 6 declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 7 declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2 8 9 declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2 10 declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2 11 declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2 12 13 declare i32 @llvm.amdgcn.workitem.id.x() #1 14 15 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32: 16 ; CIVI-DAG: s_mov_b32 m0 17 ; GFX9-NOT: m0 18 19 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 20 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] 21 define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { 22 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 23 store i32 %result, i32 addrspace(1)* %out 24 ret void 25 } 26 27 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset: 28 ; CIVI-DAG: s_mov_b32 m0 29 ; GFX9-NOT: m0 30 31 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 32 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16 33 define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { 34 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 35 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) 36 store i32 %result, i32 addrspace(1)* %out 37 ret void 38 } 39 40 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32: 41 ; CIVI-DAG: s_mov_b32 m0 42 ; GFX9-NOT: m0 43 44 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 45 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 46 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 47 ; GCN: ds_inc_u32 [[VPTR]], [[DATA]] 48 define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind { 49 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 50 ret void 51 } 52 53 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32_offset: 54 ; CIVI-DAG: s_mov_b32 m0 55 ; GFX9-NOT: m0 56 57 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 58 ; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16 59 define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 60 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 61 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) 62 ret void 63 } 64 65 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32: 66 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 67 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} 68 ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off glc{{$}} 69 define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 70 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) 71 store i32 %result, i32 addrspace(1)* %out 72 ret void 73 } 74 75 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset: 76 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 77 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}} 78 ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16 glc{{$}} 79 define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 80 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 81 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 82 store i32 %result, i32 addrspace(1)* %out 83 ret void 84 } 85 86 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32: 87 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 88 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 89 ; GFX9: global_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]], off{{$}} 90 define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind { 91 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) 92 ret void 93 } 94 95 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset: 96 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 97 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} 98 ; GFX9: global_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16{{$}} 99 define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { 100 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 101 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 102 ret void 103 } 104 105 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset_addr64: 106 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 107 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}} 108 ; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 109 define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 110 %id = call i32 @llvm.amdgcn.workitem.id.x() 111 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id 112 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id 113 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 114 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 115 store i32 %result, i32 addrspace(1)* %out.gep 116 ret void 117 } 118 119 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset_addr64: 120 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 121 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}} 122 ; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 123 define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 { 124 %id = call i32 @llvm.amdgcn.workitem.id.x() 125 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id 126 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 127 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 128 ret void 129 } 130 131 @lds0 = addrspace(3) global [512 x i32] undef, align 4 132 133 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32: 134 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} 135 ; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 136 define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { 137 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 138 %idx.0 = add nsw i32 %tid.x, 2 139 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0 140 %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false) 141 store i32 %idx.0, i32 addrspace(1)* %add_use 142 store i32 %val0, i32 addrspace(1)* %out 143 ret void 144 } 145 146 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64: 147 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 148 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 149 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 150 define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { 151 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) 152 store i64 %result, i64 addrspace(1)* %out 153 ret void 154 } 155 156 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset: 157 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 158 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 159 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 160 define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { 161 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 162 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) 163 store i64 %result, i64 addrspace(1)* %out 164 ret void 165 } 166 167 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64: 168 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 169 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 170 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 171 define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind { 172 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) 173 ret void 174 } 175 176 ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64_offset: 177 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 178 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 179 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}} 180 define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 181 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 182 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) 183 ret void 184 } 185 186 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64: 187 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 188 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 189 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} 190 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off glc{{$}} 191 define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 192 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) 193 store i64 %result, i64 addrspace(1)* %out 194 ret void 195 } 196 197 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset: 198 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 199 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 200 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}} 201 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32 glc{{$}} 202 define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 203 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 204 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 205 store i64 %result, i64 addrspace(1)* %out 206 ret void 207 } 208 209 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64: 210 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 211 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 212 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 213 214 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off{{$}} 215 define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind { 216 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) 217 ret void 218 } 219 220 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset: 221 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 222 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 223 ; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}} 224 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32{{$}} 225 define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind { 226 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 227 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 228 ret void 229 } 230 231 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64: 232 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 233 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} 234 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 235 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} 236 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 237 define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 238 %id = call i32 @llvm.amdgcn.workitem.id.x() 239 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id 240 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id 241 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 242 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 243 store i64 %result, i64 addrspace(1)* %out.gep 244 ret void 245 } 246 247 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64: 248 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 249 ; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} 250 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 251 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}} 252 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 253 define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { 254 %id = call i32 @llvm.amdgcn.workitem.id.x() 255 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id 256 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 257 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 258 ret void 259 } 260 261 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32: 262 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 263 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 264 define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 { 265 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) 266 store i32 %result, i32* %out 267 ret void 268 } 269 270 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset: 271 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 272 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 273 ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}} 274 define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 { 275 %gep = getelementptr i32, i32* %ptr, i32 4 276 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 277 store i32 %result, i32* %out 278 ret void 279 } 280 281 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32: 282 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 283 ; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 284 define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind { 285 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) 286 ret void 287 } 288 289 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset: 290 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 291 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 292 ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}} 293 define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind { 294 %gep = getelementptr i32, i32* %ptr, i32 4 295 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 296 ret void 297 } 298 299 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset_addr64: 300 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 301 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 302 ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}} 303 define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 { 304 %id = call i32 @llvm.amdgcn.workitem.id.x() 305 %gep.tid = getelementptr i32, i32* %ptr, i32 %id 306 %out.gep = getelementptr i32, i32* %out, i32 %id 307 %gep = getelementptr i32, i32* %gep.tid, i32 5 308 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 309 store i32 %result, i32* %out.gep 310 ret void 311 } 312 313 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset_addr64: 314 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 315 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 316 ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}} 317 define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 { 318 %id = call i32 @llvm.amdgcn.workitem.id.x() 319 %gep.tid = getelementptr i32, i32* %ptr, i32 %id 320 %gep = getelementptr i32, i32* %gep.tid, i32 5 321 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 322 ret void 323 } 324 325 @lds1 = addrspace(3) global [512 x i64] undef, align 8 326 327 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64: 328 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}} 329 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16 330 define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { 331 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 332 %idx.0 = add nsw i32 %tid.x, 2 333 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0 334 %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false) 335 store i32 %idx.0, i32 addrspace(1)* %add_use 336 store i64 %val0, i64 addrspace(1)* %out 337 ret void 338 } 339 340 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64: 341 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 342 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 343 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 344 define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 { 345 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) 346 store i64 %result, i64* %out 347 ret void 348 } 349 350 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset: 351 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 352 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 353 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 354 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}} 355 define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 { 356 %gep = getelementptr i64, i64* %ptr, i32 4 357 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 358 store i64 %result, i64* %out 359 ret void 360 } 361 362 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64: 363 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 364 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 365 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 366 define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind { 367 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) 368 ret void 369 } 370 371 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset: 372 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 373 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 374 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 375 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}} 376 define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind { 377 %gep = getelementptr i64, i64* %ptr, i32 4 378 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 379 ret void 380 } 381 382 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset_addr64: 383 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 384 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 385 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 386 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}} 387 define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 { 388 %id = call i32 @llvm.amdgcn.workitem.id.x() 389 %gep.tid = getelementptr i64, i64* %ptr, i32 %id 390 %out.gep = getelementptr i64, i64* %out, i32 %id 391 %gep = getelementptr i64, i64* %gep.tid, i32 5 392 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 393 store i64 %result, i64* %out.gep 394 ret void 395 } 396 397 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset_addr64: 398 ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 399 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 400 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 401 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}} 402 define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 { 403 %id = call i32 @llvm.amdgcn.workitem.id.x() 404 %gep.tid = getelementptr i64, i64* %ptr, i32 %id 405 %gep = getelementptr i64, i64* %gep.tid, i32 5 406 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 407 ret void 408 } 409 410 ; GCN-LABEL: {{^}}nocse_lds_atomic_inc_ret_i32: 411 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 412 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] 413 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] 414 define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 { 415 %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 416 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 417 418 store i32 %result0, i32 addrspace(1)* %out0 419 store i32 %result1, i32 addrspace(1)* %out1 420 ret void 421 } 422 423 attributes #0 = { nounwind } 424 attributes #1 = { nounwind readnone } 425 attributes #2 = { nounwind argmemonly } 426