1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s 3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 4 5 ; GCN-LABEL: {{^}}atomic_add_i32_offset: 6 ; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 7 ; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 8 define amdgpu_kernel void @atomic_add_i32_offset(i32* %out, i32 %in) { 9 entry: 10 %gep = getelementptr i32, i32* %out, i32 4 11 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 12 ret void 13 } 14 15 ; GCN-LABEL: {{^}}atomic_add_i32_max_offset: 16 ; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 17 ; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}} 18 define amdgpu_kernel void @atomic_add_i32_max_offset(i32* %out, i32 %in) { 19 entry: 20 %gep = getelementptr i32, i32* %out, i32 1023 21 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 22 ret void 23 } 24 25 ; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1: 26 ; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 27 define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32* %out, i32 %in) { 28 entry: 29 %gep = getelementptr i32, i32* %out, i32 1024 30 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 31 ret void 32 } 33 34 ; GCN-LABEL: {{^}}atomic_add_i32_ret_offset: 35 ; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} 36 ; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 37 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 38 define amdgpu_kernel void @atomic_add_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 39 entry: 40 %gep = getelementptr i32, i32* %out, i32 4 41 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 42 store i32 %val, i32* %out2 43 ret void 44 } 45 46 ; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset: 47 ; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 48 ; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 49 define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 50 entry: 51 %ptr = getelementptr i32, i32* %out, i64 %index 52 %gep = getelementptr i32, i32* %ptr, i32 4 53 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 54 ret void 55 } 56 57 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: 58 ; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 59 ; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 60 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 61 define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 62 entry: 63 %ptr = getelementptr i32, i32* %out, i64 %index 64 %gep = getelementptr i32, i32* %ptr, i32 4 65 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 66 store i32 %val, i32* %out2 67 ret void 68 } 69 70 ; GCN-LABEL: {{^}}atomic_add_i32: 71 ; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 72 define amdgpu_kernel void @atomic_add_i32(i32* %out, i32 %in) { 73 entry: 74 %val = atomicrmw volatile add i32* %out, i32 %in seq_cst 75 ret void 76 } 77 78 ; GCN-LABEL: {{^}}atomic_add_i32_ret: 79 ; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 80 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 81 define amdgpu_kernel void @atomic_add_i32_ret(i32* %out, i32* %out2, i32 %in) { 82 entry: 83 %val = atomicrmw volatile add i32* %out, i32 %in seq_cst 84 store i32 %val, i32* %out2 85 ret void 86 } 87 88 ; GCN-LABEL: {{^}}atomic_add_i32_addr64: 89 ; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 90 define amdgpu_kernel void @atomic_add_i32_addr64(i32* %out, i32 %in, i64 %index) { 91 entry: 92 %ptr = getelementptr i32, i32* %out, i64 %index 93 %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst 94 ret void 95 } 96 97 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64: 98 ; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 99 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 100 define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 101 entry: 102 %ptr = getelementptr i32, i32* %out, i64 %index 103 %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst 104 store i32 %val, i32* %out2 105 ret void 106 } 107 108 ; GCN-LABEL: {{^}}atomic_and_i32_offset: 109 ; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 110 ; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 111 define amdgpu_kernel void @atomic_and_i32_offset(i32* %out, i32 %in) { 112 entry: 113 %gep = getelementptr i32, i32* %out, i32 4 114 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst 115 ret void 116 } 117 118 ; GCN-LABEL: {{^}}atomic_and_i32_ret_offset: 119 ; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 120 ; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 121 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 122 define amdgpu_kernel void @atomic_and_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 123 entry: 124 %gep = getelementptr i32, i32* %out, i32 4 125 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst 126 store i32 %val, i32* %out2 127 ret void 128 } 129 130 ; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset: 131 ; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 132 ; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 133 define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 134 entry: 135 %ptr = getelementptr i32, i32* %out, i64 %index 136 %gep = getelementptr i32, i32* %ptr, i32 4 137 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst 138 ret void 139 } 140 141 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: 142 ; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 143 ; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 144 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 145 define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 146 entry: 147 %ptr = getelementptr i32, i32* %out, i64 %index 148 %gep = getelementptr i32, i32* %ptr, i32 4 149 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst 150 store i32 %val, i32* %out2 151 ret void 152 } 153 154 ; GCN-LABEL: {{^}}atomic_and_i32: 155 ; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 156 define amdgpu_kernel void @atomic_and_i32(i32* %out, i32 %in) { 157 entry: 158 %val = atomicrmw volatile and i32* %out, i32 %in seq_cst 159 ret void 160 } 161 162 ; GCN-LABEL: {{^}}atomic_and_i32_ret: 163 ; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 164 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 165 define amdgpu_kernel void @atomic_and_i32_ret(i32* %out, i32* %out2, i32 %in) { 166 entry: 167 %val = atomicrmw volatile and i32* %out, i32 %in seq_cst 168 store i32 %val, i32* %out2 169 ret void 170 } 171 172 ; GCN-LABEL: {{^}}atomic_and_i32_addr64: 173 ; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 174 define amdgpu_kernel void @atomic_and_i32_addr64(i32* %out, i32 %in, i64 %index) { 175 entry: 176 %ptr = getelementptr i32, i32* %out, i64 %index 177 %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst 178 ret void 179 } 180 181 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64: 182 ; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 183 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 184 define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 185 entry: 186 %ptr = getelementptr i32, i32* %out, i64 %index 187 %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst 188 store i32 %val, i32* %out2 189 ret void 190 } 191 192 ; GCN-LABEL: {{^}}atomic_sub_i32_offset: 193 ; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 194 ; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 195 define amdgpu_kernel void @atomic_sub_i32_offset(i32* %out, i32 %in) { 196 entry: 197 %gep = getelementptr i32, i32* %out, i32 4 198 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst 199 ret void 200 } 201 202 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset: 203 ; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 204 ; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 205 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 206 define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 207 entry: 208 %gep = getelementptr i32, i32* %out, i32 4 209 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst 210 store i32 %val, i32* %out2 211 ret void 212 } 213 214 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset: 215 ; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 216 ; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 217 define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 218 entry: 219 %ptr = getelementptr i32, i32* %out, i64 %index 220 %gep = getelementptr i32, i32* %ptr, i32 4 221 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst 222 ret void 223 } 224 225 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: 226 ; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 227 ; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 228 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 229 define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 230 entry: 231 %ptr = getelementptr i32, i32* %out, i64 %index 232 %gep = getelementptr i32, i32* %ptr, i32 4 233 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst 234 store i32 %val, i32* %out2 235 ret void 236 } 237 238 ; GCN-LABEL: {{^}}atomic_sub_i32: 239 ; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 240 define amdgpu_kernel void @atomic_sub_i32(i32* %out, i32 %in) { 241 entry: 242 %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst 243 ret void 244 } 245 246 ; GCN-LABEL: {{^}}atomic_sub_i32_ret: 247 ; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 248 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 249 define amdgpu_kernel void @atomic_sub_i32_ret(i32* %out, i32* %out2, i32 %in) { 250 entry: 251 %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst 252 store i32 %val, i32* %out2 253 ret void 254 } 255 256 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64: 257 ; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 258 define amdgpu_kernel void @atomic_sub_i32_addr64(i32* %out, i32 %in, i64 %index) { 259 entry: 260 %ptr = getelementptr i32, i32* %out, i64 %index 261 %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst 262 ret void 263 } 264 265 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64: 266 ; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 267 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 268 define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 269 entry: 270 %ptr = getelementptr i32, i32* %out, i64 %index 271 %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst 272 store i32 %val, i32* %out2 273 ret void 274 } 275 276 ; GCN-LABEL: {{^}}atomic_max_i32_offset: 277 ; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 278 ; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 279 define amdgpu_kernel void @atomic_max_i32_offset(i32* %out, i32 %in) { 280 entry: 281 %gep = getelementptr i32, i32* %out, i32 4 282 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst 283 ret void 284 } 285 286 ; GCN-LABEL: {{^}}atomic_max_i32_ret_offset: 287 ; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 288 ; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 289 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 290 define amdgpu_kernel void @atomic_max_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 291 entry: 292 %gep = getelementptr i32, i32* %out, i32 4 293 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst 294 store i32 %val, i32* %out2 295 ret void 296 } 297 298 ; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset: 299 ; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 300 ; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 301 define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 302 entry: 303 %ptr = getelementptr i32, i32* %out, i64 %index 304 %gep = getelementptr i32, i32* %ptr, i32 4 305 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst 306 ret void 307 } 308 309 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: 310 ; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 311 ; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 312 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 313 define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 314 entry: 315 %ptr = getelementptr i32, i32* %out, i64 %index 316 %gep = getelementptr i32, i32* %ptr, i32 4 317 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst 318 store i32 %val, i32* %out2 319 ret void 320 } 321 322 ; GCN-LABEL: {{^}}atomic_max_i32: 323 ; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 324 define amdgpu_kernel void @atomic_max_i32(i32* %out, i32 %in) { 325 entry: 326 %val = atomicrmw volatile max i32* %out, i32 %in seq_cst 327 ret void 328 } 329 330 ; GCN-LABEL: {{^}}atomic_max_i32_ret: 331 ; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 332 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 333 define amdgpu_kernel void @atomic_max_i32_ret(i32* %out, i32* %out2, i32 %in) { 334 entry: 335 %val = atomicrmw volatile max i32* %out, i32 %in seq_cst 336 store i32 %val, i32* %out2 337 ret void 338 } 339 340 ; GCN-LABEL: {{^}}atomic_max_i32_addr64: 341 ; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 342 define amdgpu_kernel void @atomic_max_i32_addr64(i32* %out, i32 %in, i64 %index) { 343 entry: 344 %ptr = getelementptr i32, i32* %out, i64 %index 345 %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst 346 ret void 347 } 348 349 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64: 350 ; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 351 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 352 define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 353 entry: 354 %ptr = getelementptr i32, i32* %out, i64 %index 355 %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst 356 store i32 %val, i32* %out2 357 ret void 358 } 359 360 ; GCN-LABEL: {{^}}atomic_umax_i32_offset: 361 ; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 362 ; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 363 define amdgpu_kernel void @atomic_umax_i32_offset(i32* %out, i32 %in) { 364 entry: 365 %gep = getelementptr i32, i32* %out, i32 4 366 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst 367 ret void 368 } 369 370 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset: 371 ; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 372 ; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 373 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 374 define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 375 entry: 376 %gep = getelementptr i32, i32* %out, i32 4 377 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst 378 store i32 %val, i32* %out2 379 ret void 380 } 381 382 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset: 383 ; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 384 ; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 385 define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 386 entry: 387 %ptr = getelementptr i32, i32* %out, i64 %index 388 %gep = getelementptr i32, i32* %ptr, i32 4 389 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst 390 ret void 391 } 392 393 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: 394 ; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 395 ; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 396 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 397 define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 398 entry: 399 %ptr = getelementptr i32, i32* %out, i64 %index 400 %gep = getelementptr i32, i32* %ptr, i32 4 401 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst 402 store i32 %val, i32* %out2 403 ret void 404 } 405 406 ; GCN-LABEL: {{^}}atomic_umax_i32: 407 ; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 408 define amdgpu_kernel void @atomic_umax_i32(i32* %out, i32 %in) { 409 entry: 410 %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst 411 ret void 412 } 413 414 ; GCN-LABEL: {{^}}atomic_umax_i32_ret: 415 ; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 416 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 417 define amdgpu_kernel void @atomic_umax_i32_ret(i32* %out, i32* %out2, i32 %in) { 418 entry: 419 %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst 420 store i32 %val, i32* %out2 421 ret void 422 } 423 424 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64: 425 ; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 426 define amdgpu_kernel void @atomic_umax_i32_addr64(i32* %out, i32 %in, i64 %index) { 427 entry: 428 %ptr = getelementptr i32, i32* %out, i64 %index 429 %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst 430 ret void 431 } 432 433 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64: 434 ; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 435 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 436 define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 437 entry: 438 %ptr = getelementptr i32, i32* %out, i64 %index 439 %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst 440 store i32 %val, i32* %out2 441 ret void 442 } 443 444 ; GCN-LABEL: {{^}}atomic_min_i32_offset: 445 ; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 446 ; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 447 define amdgpu_kernel void @atomic_min_i32_offset(i32* %out, i32 %in) { 448 entry: 449 %gep = getelementptr i32, i32* %out, i32 4 450 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst 451 ret void 452 } 453 454 ; GCN-LABEL: {{^}}atomic_min_i32_ret_offset: 455 ; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 456 ; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 457 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 458 define amdgpu_kernel void @atomic_min_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 459 entry: 460 %gep = getelementptr i32, i32* %out, i32 4 461 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst 462 store i32 %val, i32* %out2 463 ret void 464 } 465 466 ; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset: 467 ; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 468 ; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 469 define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 470 entry: 471 %ptr = getelementptr i32, i32* %out, i64 %index 472 %gep = getelementptr i32, i32* %ptr, i32 4 473 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst 474 ret void 475 } 476 477 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: 478 ; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 479 ; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 480 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 481 define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 482 entry: 483 %ptr = getelementptr i32, i32* %out, i64 %index 484 %gep = getelementptr i32, i32* %ptr, i32 4 485 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst 486 store i32 %val, i32* %out2 487 ret void 488 } 489 490 ; GCN-LABEL: {{^}}atomic_min_i32: 491 ; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 492 define amdgpu_kernel void @atomic_min_i32(i32* %out, i32 %in) { 493 entry: 494 %val = atomicrmw volatile min i32* %out, i32 %in seq_cst 495 ret void 496 } 497 498 ; GCN-LABEL: {{^}}atomic_min_i32_ret: 499 ; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 500 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 501 define amdgpu_kernel void @atomic_min_i32_ret(i32* %out, i32* %out2, i32 %in) { 502 entry: 503 %val = atomicrmw volatile min i32* %out, i32 %in seq_cst 504 store i32 %val, i32* %out2 505 ret void 506 } 507 508 ; GCN-LABEL: {{^}}atomic_min_i32_addr64: 509 ; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 510 define amdgpu_kernel void @atomic_min_i32_addr64(i32* %out, i32 %in, i64 %index) { 511 entry: 512 %ptr = getelementptr i32, i32* %out, i64 %index 513 %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst 514 ret void 515 } 516 517 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64: 518 ; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 519 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 520 define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 521 entry: 522 %ptr = getelementptr i32, i32* %out, i64 %index 523 %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst 524 store i32 %val, i32* %out2 525 ret void 526 } 527 528 ; GCN-LABEL: {{^}}atomic_umin_i32_offset: 529 ; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 530 ; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 531 define amdgpu_kernel void @atomic_umin_i32_offset(i32* %out, i32 %in) { 532 entry: 533 %gep = getelementptr i32, i32* %out, i32 4 534 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst 535 ret void 536 } 537 538 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset: 539 ; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 540 ; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 541 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 542 define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 543 entry: 544 %gep = getelementptr i32, i32* %out, i32 4 545 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst 546 store i32 %val, i32* %out2 547 ret void 548 } 549 550 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset: 551 ; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 552 ; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 553 define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 554 entry: 555 %ptr = getelementptr i32, i32* %out, i64 %index 556 %gep = getelementptr i32, i32* %ptr, i32 4 557 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst 558 ret void 559 } 560 561 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: 562 ; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 563 ; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 564 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 565 define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 566 entry: 567 %ptr = getelementptr i32, i32* %out, i64 %index 568 %gep = getelementptr i32, i32* %ptr, i32 4 569 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst 570 store i32 %val, i32* %out2 571 ret void 572 } 573 574 ; GCN-LABEL: {{^}}atomic_umin_i32: 575 ; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 576 define amdgpu_kernel void @atomic_umin_i32(i32* %out, i32 %in) { 577 entry: 578 %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst 579 ret void 580 } 581 582 ; GCN-LABEL: {{^}}atomic_umin_i32_ret: 583 ; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 584 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 585 define amdgpu_kernel void @atomic_umin_i32_ret(i32* %out, i32* %out2, i32 %in) { 586 entry: 587 %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst 588 store i32 %val, i32* %out2 589 ret void 590 } 591 592 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64: 593 ; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 594 define amdgpu_kernel void @atomic_umin_i32_addr64(i32* %out, i32 %in, i64 %index) { 595 entry: 596 %ptr = getelementptr i32, i32* %out, i64 %index 597 %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst 598 ret void 599 } 600 601 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64: 602 ; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 603 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]{{$}} 604 define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 605 entry: 606 %ptr = getelementptr i32, i32* %out, i64 %index 607 %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst 608 store i32 %val, i32* %out2 609 ret void 610 } 611 612 ; GCN-LABEL: {{^}}atomic_or_i32_offset: 613 ; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 614 ; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 615 define amdgpu_kernel void @atomic_or_i32_offset(i32* %out, i32 %in) { 616 entry: 617 %gep = getelementptr i32, i32* %out, i32 4 618 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst 619 ret void 620 } 621 622 ; GCN-LABEL: {{^}}atomic_or_i32_ret_offset: 623 ; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 624 ; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 625 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 626 define amdgpu_kernel void @atomic_or_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 627 entry: 628 %gep = getelementptr i32, i32* %out, i32 4 629 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst 630 store i32 %val, i32* %out2 631 ret void 632 } 633 634 ; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset: 635 ; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 636 ; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 637 define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 638 entry: 639 %ptr = getelementptr i32, i32* %out, i64 %index 640 %gep = getelementptr i32, i32* %ptr, i32 4 641 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst 642 ret void 643 } 644 645 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: 646 ; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 647 ; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 648 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 649 define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 650 entry: 651 %ptr = getelementptr i32, i32* %out, i64 %index 652 %gep = getelementptr i32, i32* %ptr, i32 4 653 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst 654 store i32 %val, i32* %out2 655 ret void 656 } 657 658 ; GCN-LABEL: {{^}}atomic_or_i32: 659 ; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 660 define amdgpu_kernel void @atomic_or_i32(i32* %out, i32 %in) { 661 entry: 662 %val = atomicrmw volatile or i32* %out, i32 %in seq_cst 663 ret void 664 } 665 666 ; GCN-LABEL: {{^}}atomic_or_i32_ret: 667 ; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 668 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 669 define amdgpu_kernel void @atomic_or_i32_ret(i32* %out, i32* %out2, i32 %in) { 670 entry: 671 %val = atomicrmw volatile or i32* %out, i32 %in seq_cst 672 store i32 %val, i32* %out2 673 ret void 674 } 675 676 ; GCN-LABEL: {{^}}atomic_or_i32_addr64: 677 ; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 678 define amdgpu_kernel void @atomic_or_i32_addr64(i32* %out, i32 %in, i64 %index) { 679 entry: 680 %ptr = getelementptr i32, i32* %out, i64 %index 681 %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst 682 ret void 683 } 684 685 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64: 686 ; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 687 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 688 define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 689 entry: 690 %ptr = getelementptr i32, i32* %out, i64 %index 691 %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst 692 store i32 %val, i32* %out2 693 ret void 694 } 695 696 ; GCN-LABEL: {{^}}atomic_xchg_i32_offset: 697 ; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 698 ; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 699 define amdgpu_kernel void @atomic_xchg_i32_offset(i32* %out, i32 %in) { 700 entry: 701 %gep = getelementptr i32, i32* %out, i32 4 702 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst 703 ret void 704 } 705 706 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset: 707 ; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 708 ; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 709 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 710 define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 711 entry: 712 %gep = getelementptr i32, i32* %out, i32 4 713 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst 714 store i32 %val, i32* %out2 715 ret void 716 } 717 718 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset: 719 ; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 720 ; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 721 define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 722 entry: 723 %ptr = getelementptr i32, i32* %out, i64 %index 724 %gep = getelementptr i32, i32* %ptr, i32 4 725 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst 726 ret void 727 } 728 729 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: 730 ; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 731 ; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 732 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 733 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 734 entry: 735 %ptr = getelementptr i32, i32* %out, i64 %index 736 %gep = getelementptr i32, i32* %ptr, i32 4 737 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst 738 store i32 %val, i32* %out2 739 ret void 740 } 741 742 ; GCN-LABEL: {{^}}atomic_xchg_i32: 743 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 744 define amdgpu_kernel void @atomic_xchg_i32(i32* %out, i32 %in) { 745 entry: 746 %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst 747 ret void 748 } 749 750 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret: 751 ; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} 752 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 753 define amdgpu_kernel void @atomic_xchg_i32_ret(i32* %out, i32* %out2, i32 %in) { 754 entry: 755 %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst 756 store i32 %val, i32* %out2 757 ret void 758 } 759 760 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64: 761 ; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 762 define amdgpu_kernel void @atomic_xchg_i32_addr64(i32* %out, i32 %in, i64 %index) { 763 entry: 764 %ptr = getelementptr i32, i32* %out, i64 %index 765 %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst 766 ret void 767 } 768 769 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64: 770 ; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 771 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 772 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 773 entry: 774 %ptr = getelementptr i32, i32* %out, i64 %index 775 %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst 776 store i32 %val, i32* %out2 777 ret void 778 } 779 780 ; CMP_SWAP 781 782 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset: 783 ; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 784 ; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 785 define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32* %out, i32 %in, i32 %old) { 786 entry: 787 %gep = getelementptr i32, i32* %out, i32 4 788 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 789 ret void 790 } 791 792 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset: 793 ; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 794 ; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 795 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] 796 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in, i32 %old) { 797 entry: 798 %gep = getelementptr i32, i32* %out, i32 4 799 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 800 %flag = extractvalue { i32, i1 } %val, 0 801 store i32 %flag, i32* %out2 802 ret void 803 } 804 805 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset: 806 ; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 807 ; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 808 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index, i32 %old) { 809 entry: 810 %ptr = getelementptr i32, i32* %out, i64 %index 811 %gep = getelementptr i32, i32* %ptr, i32 4 812 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 813 ret void 814 } 815 816 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset: 817 ; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 818 ; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 819 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] 820 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) { 821 entry: 822 %ptr = getelementptr i32, i32* %out, i64 %index 823 %gep = getelementptr i32, i32* %ptr, i32 4 824 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 825 %flag = extractvalue { i32, i1 } %val, 0 826 store i32 %flag, i32* %out2 827 ret void 828 } 829 830 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32: 831 ; GCN: flat_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 832 define amdgpu_kernel void @atomic_cmpxchg_i32(i32* %out, i32 %in, i32 %old) { 833 entry: 834 %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst 835 ret void 836 } 837 838 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret: 839 ; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc 840 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] 841 define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32* %out, i32* %out2, i32 %in, i32 %old) { 842 entry: 843 %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst 844 %flag = extractvalue { i32, i1 } %val, 0 845 store i32 %flag, i32* %out2 846 ret void 847 } 848 849 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64: 850 ; GCN: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} 851 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32* %out, i32 %in, i64 %index, i32 %old) { 852 entry: 853 %ptr = getelementptr i32, i32* %out, i64 %index 854 %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst 855 ret void 856 } 857 858 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64: 859 ; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 860 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] 861 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) { 862 entry: 863 %ptr = getelementptr i32, i32* %out, i64 %index 864 %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst 865 %flag = extractvalue { i32, i1 } %val, 0 866 store i32 %flag, i32* %out2 867 ret void 868 } 869 870 ; GCN-LABEL: {{^}}atomic_xor_i32_offset: 871 ; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 872 ; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 873 define amdgpu_kernel void @atomic_xor_i32_offset(i32* %out, i32 %in) { 874 entry: 875 %gep = getelementptr i32, i32* %out, i32 4 876 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst 877 ret void 878 } 879 880 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset: 881 ; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} 882 ; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 883 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 884 define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 885 entry: 886 %gep = getelementptr i32, i32* %out, i32 4 887 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst 888 store i32 %val, i32* %out2 889 ret void 890 } 891 892 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset: 893 ; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 894 ; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 895 define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 896 entry: 897 %ptr = getelementptr i32, i32* %out, i64 %index 898 %gep = getelementptr i32, i32* %ptr, i32 4 899 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst 900 ret void 901 } 902 903 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: 904 ; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 905 ; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 906 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 907 define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 908 entry: 909 %ptr = getelementptr i32, i32* %out, i64 %index 910 %gep = getelementptr i32, i32* %ptr, i32 4 911 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst 912 store i32 %val, i32* %out2 913 ret void 914 } 915 916 ; GCN-LABEL: {{^}}atomic_xor_i32: 917 ; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 918 define amdgpu_kernel void @atomic_xor_i32(i32* %out, i32 %in) { 919 entry: 920 %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst 921 ret void 922 } 923 924 ; GCN-LABEL: {{^}}atomic_xor_i32_ret: 925 ; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} 926 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 927 define amdgpu_kernel void @atomic_xor_i32_ret(i32* %out, i32* %out2, i32 %in) { 928 entry: 929 %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst 930 store i32 %val, i32* %out2 931 ret void 932 } 933 934 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64: 935 ; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 936 define amdgpu_kernel void @atomic_xor_i32_addr64(i32* %out, i32 %in, i64 %index) { 937 entry: 938 %ptr = getelementptr i32, i32* %out, i64 %index 939 %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst 940 ret void 941 } 942 943 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64: 944 ; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 945 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 946 define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 947 entry: 948 %ptr = getelementptr i32, i32* %out, i64 %index 949 %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst 950 store i32 %val, i32* %out2 951 ret void 952 } 953 954 ; GCN-LABEL: {{^}}atomic_load_i32_offset: 955 ; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 956 ; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 957 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 958 define amdgpu_kernel void @atomic_load_i32_offset(i32* %in, i32* %out) { 959 entry: 960 %gep = getelementptr i32, i32* %in, i32 4 961 %val = load atomic i32, i32* %gep seq_cst, align 4 962 store i32 %val, i32* %out 963 ret void 964 } 965 966 ; GCN-LABEL: {{^}}atomic_load_i32: 967 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc 968 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 969 define amdgpu_kernel void @atomic_load_i32(i32* %in, i32* %out) { 970 entry: 971 %val = load atomic i32, i32* %in seq_cst, align 4 972 store i32 %val, i32* %out 973 ret void 974 } 975 976 ; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset: 977 ; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 978 ; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 979 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 980 define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32* %in, i32* %out, i64 %index) { 981 entry: 982 %ptr = getelementptr i32, i32* %in, i64 %index 983 %gep = getelementptr i32, i32* %ptr, i32 4 984 %val = load atomic i32, i32* %gep seq_cst, align 4 985 store i32 %val, i32* %out 986 ret void 987 } 988 989 ; GCN-LABEL: {{^}}atomic_load_i32_addr64: 990 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 991 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 992 define amdgpu_kernel void @atomic_load_i32_addr64(i32* %in, i32* %out, i64 %index) { 993 entry: 994 %ptr = getelementptr i32, i32* %in, i64 %index 995 %val = load atomic i32, i32* %ptr seq_cst, align 4 996 store i32 %val, i32* %out 997 ret void 998 } 999 1000 ; GCN-LABEL: {{^}}atomic_store_i32_offset: 1001 ; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1002 ; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}} 1003 define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32* %out) { 1004 entry: 1005 %gep = getelementptr i32, i32* %out, i32 4 1006 store atomic i32 %in, i32* %gep seq_cst, align 4 1007 ret void 1008 } 1009 1010 ; GCN-LABEL: {{^}}atomic_store_i32: 1011 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1012 define amdgpu_kernel void @atomic_store_i32(i32 %in, i32* %out) { 1013 entry: 1014 store atomic i32 %in, i32* %out seq_cst, align 4 1015 ret void 1016 } 1017 1018 ; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset: 1019 ; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1020 ; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}} 1021 define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32* %out, i64 %index) { 1022 entry: 1023 %ptr = getelementptr i32, i32* %out, i64 %index 1024 %gep = getelementptr i32, i32* %ptr, i32 4 1025 store atomic i32 %in, i32* %gep seq_cst, align 4 1026 ret void 1027 } 1028 1029 ; GCN-LABEL: {{^}}atomic_store_i32_addr64: 1030 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1031 define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32* %out, i64 %index) { 1032 entry: 1033 %ptr = getelementptr i32, i32* %out, i64 %index 1034 store atomic i32 %in, i32* %ptr seq_cst, align 4 1035 ret void 1036 } 1037