1 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 3 4 ; GCN-LABEL: {{^}}system_monotonic_monotonic: 5 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 6 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 7 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 8 ; GCN-NOT: buffer_wbinvl1_vol 9 define amdgpu_kernel void @system_monotonic_monotonic( 10 i32* %out, i32 %in, i32 %old) { 11 entry: 12 %gep = getelementptr i32, i32* %out, i32 4 13 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic 14 ret void 15 } 16 17 ; GCN-LABEL: {{^}}system_acquire_monotonic: 18 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 19 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 20 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 21 ; GFX8-NEXT: buffer_wbinvl1_vol 22 define amdgpu_kernel void @system_acquire_monotonic( 23 i32* %out, i32 %in, i32 %old) { 24 entry: 25 %gep = getelementptr i32, i32* %out, i32 4 26 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic 27 ret void 28 } 29 30 ; GCN-LABEL: {{^}}system_release_monotonic: 31 ; GCN: s_waitcnt vmcnt(0){{$}} 32 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 33 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 34 ; GCN-NOT: buffer_wbinvl1_vol 35 define amdgpu_kernel void @system_release_monotonic( 36 i32* %out, i32 %in, i32 %old) { 37 entry: 38 %gep = getelementptr i32, i32* %out, i32 4 39 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic 40 ret void 41 } 42 43 ; GCN-LABEL: {{^}}system_acq_rel_monotonic: 44 ; GCN: s_waitcnt vmcnt(0){{$}} 45 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 46 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 47 ; GFX8-NEXT: buffer_wbinvl1_vol 48 define amdgpu_kernel void @system_acq_rel_monotonic( 49 i32* %out, i32 %in, i32 %old) { 50 entry: 51 %gep = getelementptr i32, i32* %out, i32 4 52 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic 53 ret void 54 } 55 56 ; GCN-LABEL: {{^}}system_seq_cst_monotonic: 57 ; GCN: s_waitcnt vmcnt(0){{$}} 58 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 59 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 60 ; GFX8-NEXT: buffer_wbinvl1_vol 61 define amdgpu_kernel void @system_seq_cst_monotonic( 62 i32* %out, i32 %in, i32 %old) { 63 entry: 64 %gep = getelementptr i32, i32* %out, i32 4 65 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic 66 ret void 67 } 68 69 ; GCN-LABEL: {{^}}system_acquire_acquire: 70 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 71 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 72 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 73 ; GFX8-NEXT: buffer_wbinvl1_vol 74 define amdgpu_kernel void @system_acquire_acquire( 75 i32* %out, i32 %in, i32 %old) { 76 entry: 77 %gep = getelementptr i32, i32* %out, i32 4 78 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire 79 ret void 80 } 81 82 ; GCN-LABEL: {{^}}system_release_acquire: 83 ; GCN: s_waitcnt vmcnt(0){{$}} 84 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 85 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 86 ; GFX8-NEXT: buffer_wbinvl1_vol 87 define amdgpu_kernel void @system_release_acquire( 88 i32* %out, i32 %in, i32 %old) { 89 entry: 90 %gep = getelementptr i32, i32* %out, i32 4 91 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire 92 ret void 93 } 94 95 ; GCN-LABEL: {{^}}system_acq_rel_acquire: 96 ; GCN: s_waitcnt vmcnt(0){{$}} 97 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 98 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 99 ; GFX8-NEXT: buffer_wbinvl1_vol 100 define amdgpu_kernel void @system_acq_rel_acquire( 101 i32* %out, i32 %in, i32 %old) { 102 entry: 103 %gep = getelementptr i32, i32* %out, i32 4 104 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire 105 ret void 106 } 107 108 ; GCN-LABEL: {{^}}system_seq_cst_acquire: 109 ; GCN: s_waitcnt vmcnt(0){{$}} 110 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 111 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 112 ; GFX8-NEXT: buffer_wbinvl1_vol 113 define amdgpu_kernel void @system_seq_cst_acquire( 114 i32* %out, i32 %in, i32 %old) { 115 entry: 116 %gep = getelementptr i32, i32* %out, i32 4 117 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire 118 ret void 119 } 120 121 ; GCN-LABEL: {{^}}system_seq_cst_seq_cst: 122 ; GCN: s_waitcnt vmcnt(0){{$}} 123 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 124 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 125 ; GFX8-NEXT: buffer_wbinvl1_vol 126 define amdgpu_kernel void @system_seq_cst_seq_cst( 127 i32* %out, i32 %in, i32 %old) { 128 entry: 129 %gep = getelementptr i32, i32* %out, i32 4 130 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 131 ret void 132 } 133 134 ; GCN-LABEL: {{^}}singlethread_monotonic_monotonic: 135 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 136 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 137 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 138 ; GCN-NOT: buffer_wbinvl1_vol 139 define amdgpu_kernel void @singlethread_monotonic_monotonic( 140 i32* %out, i32 %in, i32 %old) { 141 entry: 142 %gep = getelementptr i32, i32* %out, i32 4 143 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic 144 ret void 145 } 146 147 ; GCN-LABEL: {{^}}singlethread_acquire_monotonic: 148 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 149 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 150 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 151 ; GCN-NOT: buffer_wbinvl1_vol 152 define amdgpu_kernel void @singlethread_acquire_monotonic( 153 i32* %out, i32 %in, i32 %old) { 154 entry: 155 %gep = getelementptr i32, i32* %out, i32 4 156 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic 157 ret void 158 } 159 160 ; GCN-LABEL: {{^}}singlethread_release_monotonic: 161 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 162 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 163 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 164 ; GCN-NOT: buffer_wbinvl1_vol 165 define amdgpu_kernel void @singlethread_release_monotonic( 166 i32* %out, i32 %in, i32 %old) { 167 entry: 168 %gep = getelementptr i32, i32* %out, i32 4 169 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic 170 ret void 171 } 172 173 ; GCN-LABEL: {{^}}singlethread_acq_rel_monotonic: 174 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 175 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 176 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 177 ; GCN-NOT: buffer_wbinvl1_vol 178 define amdgpu_kernel void @singlethread_acq_rel_monotonic( 179 i32* %out, i32 %in, i32 %old) { 180 entry: 181 %gep = getelementptr i32, i32* %out, i32 4 182 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic 183 ret void 184 } 185 186 ; GCN-LABEL: {{^}}singlethread_seq_cst_monotonic: 187 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 188 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 189 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 190 ; GCN-NOT: buffer_wbinvl1_vol 191 define amdgpu_kernel void @singlethread_seq_cst_monotonic( 192 i32* %out, i32 %in, i32 %old) { 193 entry: 194 %gep = getelementptr i32, i32* %out, i32 4 195 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic 196 ret void 197 } 198 199 ; GCN-LABEL: {{^}}singlethread_acquire_acquire: 200 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 201 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 202 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 203 ; GCN-NOT: buffer_wbinvl1_vol 204 define amdgpu_kernel void @singlethread_acquire_acquire( 205 i32* %out, i32 %in, i32 %old) { 206 entry: 207 %gep = getelementptr i32, i32* %out, i32 4 208 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire 209 ret void 210 } 211 212 ; GCN-LABEL: {{^}}singlethread_release_acquire: 213 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 214 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 215 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 216 ; GCN-NOT: buffer_wbinvl1_vol 217 define amdgpu_kernel void @singlethread_release_acquire( 218 i32* %out, i32 %in, i32 %old) { 219 entry: 220 %gep = getelementptr i32, i32* %out, i32 4 221 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire 222 ret void 223 } 224 225 ; GCN-LABEL: {{^}}singlethread_acq_rel_acquire: 226 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 227 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 228 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 229 ; GCN-NOT: buffer_wbinvl1_vol 230 define amdgpu_kernel void @singlethread_acq_rel_acquire( 231 i32* %out, i32 %in, i32 %old) { 232 entry: 233 %gep = getelementptr i32, i32* %out, i32 4 234 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire 235 ret void 236 } 237 238 ; GCN-LABEL: {{^}}singlethread_seq_cst_acquire: 239 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 240 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 241 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 242 ; GCN-NOT: buffer_wbinvl1_vol 243 define amdgpu_kernel void @singlethread_seq_cst_acquire( 244 i32* %out, i32 %in, i32 %old) { 245 entry: 246 %gep = getelementptr i32, i32* %out, i32 4 247 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire 248 ret void 249 } 250 251 ; GCN-LABEL: {{^}}singlethread_seq_cst_seq_cst: 252 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 253 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 254 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 255 ; GCN-NOT: buffer_wbinvl1_vol 256 define amdgpu_kernel void @singlethread_seq_cst_seq_cst( 257 i32* %out, i32 %in, i32 %old) { 258 entry: 259 %gep = getelementptr i32, i32* %out, i32 4 260 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst 261 ret void 262 } 263 264 ; GCN-LABEL: {{^}}agent_monotonic_monotonic: 265 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 266 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 267 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 268 ; GCN-NOT: buffer_wbinvl1_vol 269 define amdgpu_kernel void @agent_monotonic_monotonic( 270 i32* %out, i32 %in, i32 %old) { 271 entry: 272 %gep = getelementptr i32, i32* %out, i32 4 273 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic 274 ret void 275 } 276 277 ; GCN-LABEL: {{^}}agent_acquire_monotonic: 278 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 279 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 280 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 281 ; GFX8-NEXT: buffer_wbinvl1_vol 282 define amdgpu_kernel void @agent_acquire_monotonic( 283 i32* %out, i32 %in, i32 %old) { 284 entry: 285 %gep = getelementptr i32, i32* %out, i32 4 286 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic 287 ret void 288 } 289 290 ; GCN-LABEL: {{^}}agent_release_monotonic: 291 ; GCN: s_waitcnt vmcnt(0){{$}} 292 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 293 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 294 ; GCN-NOT: buffer_wbinvl1_vol 295 define amdgpu_kernel void @agent_release_monotonic( 296 i32* %out, i32 %in, i32 %old) { 297 entry: 298 %gep = getelementptr i32, i32* %out, i32 4 299 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic 300 ret void 301 } 302 303 ; GCN-LABEL: {{^}}agent_acq_rel_monotonic: 304 ; GCN: s_waitcnt vmcnt(0){{$}} 305 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 306 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 307 ; GFX8-NEXT: buffer_wbinvl1_vol 308 define amdgpu_kernel void @agent_acq_rel_monotonic( 309 i32* %out, i32 %in, i32 %old) { 310 entry: 311 %gep = getelementptr i32, i32* %out, i32 4 312 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic 313 ret void 314 } 315 316 ; GCN-LABEL: {{^}}agent_seq_cst_monotonic: 317 ; GCN: s_waitcnt vmcnt(0){{$}} 318 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 319 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 320 ; GFX8-NEXT: buffer_wbinvl1_vol 321 define amdgpu_kernel void @agent_seq_cst_monotonic( 322 i32* %out, i32 %in, i32 %old) { 323 entry: 324 %gep = getelementptr i32, i32* %out, i32 4 325 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic 326 ret void 327 } 328 329 ; GCN-LABEL: {{^}}agent_acquire_acquire: 330 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 331 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 332 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 333 ; GFX8-NEXT: buffer_wbinvl1_vol 334 define amdgpu_kernel void @agent_acquire_acquire( 335 i32* %out, i32 %in, i32 %old) { 336 entry: 337 %gep = getelementptr i32, i32* %out, i32 4 338 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire 339 ret void 340 } 341 342 ; GCN-LABEL: {{^}}agent_release_acquire: 343 ; GCN: s_waitcnt vmcnt(0){{$}} 344 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 345 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 346 ; GFX8-NEXT: buffer_wbinvl1_vol 347 define amdgpu_kernel void @agent_release_acquire( 348 i32* %out, i32 %in, i32 %old) { 349 entry: 350 %gep = getelementptr i32, i32* %out, i32 4 351 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire 352 ret void 353 } 354 355 ; GCN-LABEL: {{^}}agent_acq_rel_acquire: 356 ; GCN: s_waitcnt vmcnt(0){{$}} 357 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 358 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 359 ; GFX8-NEXT: buffer_wbinvl1_vol 360 define amdgpu_kernel void @agent_acq_rel_acquire( 361 i32* %out, i32 %in, i32 %old) { 362 entry: 363 %gep = getelementptr i32, i32* %out, i32 4 364 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire 365 ret void 366 } 367 368 ; GCN-LABEL: {{^}}agent_seq_cst_acquire: 369 ; GCN: s_waitcnt vmcnt(0){{$}} 370 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 371 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 372 ; GFX8-NEXT: buffer_wbinvl1_vol 373 define amdgpu_kernel void @agent_seq_cst_acquire( 374 i32* %out, i32 %in, i32 %old) { 375 entry: 376 %gep = getelementptr i32, i32* %out, i32 4 377 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire 378 ret void 379 } 380 381 ; GCN-LABEL: {{^}}agent_seq_cst_seq_cst: 382 ; GCN: s_waitcnt vmcnt(0){{$}} 383 ; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 384 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 385 ; GFX8-NEXT: buffer_wbinvl1_vol 386 define amdgpu_kernel void @agent_seq_cst_seq_cst( 387 i32* %out, i32 %in, i32 %old) { 388 entry: 389 %gep = getelementptr i32, i32* %out, i32 4 390 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst 391 ret void 392 } 393 394 ; GCN-LABEL: {{^}}workgroup_monotonic_monotonic: 395 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 396 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 397 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 398 ; GCN-NOT: buffer_wbinvl1_vol 399 define amdgpu_kernel void @workgroup_monotonic_monotonic( 400 i32* %out, i32 %in, i32 %old) { 401 entry: 402 %gep = getelementptr i32, i32* %out, i32 4 403 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic 404 ret void 405 } 406 407 ; GCN-LABEL: {{^}}workgroup_acquire_monotonic: 408 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 409 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 410 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 411 ; GFX8-NOT: buffer_wbinvl1_vol 412 define amdgpu_kernel void @workgroup_acquire_monotonic( 413 i32* %out, i32 %in, i32 %old) { 414 entry: 415 %gep = getelementptr i32, i32* %out, i32 4 416 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic 417 ret void 418 } 419 420 ; GCN-LABEL: {{^}}workgroup_release_monotonic: 421 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 422 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 423 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 424 ; GCN-NOT: buffer_wbinvl1_vol 425 define amdgpu_kernel void @workgroup_release_monotonic( 426 i32* %out, i32 %in, i32 %old) { 427 entry: 428 %gep = getelementptr i32, i32* %out, i32 4 429 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic 430 ret void 431 } 432 433 ; GCN-LABEL: {{^}}workgroup_acq_rel_monotonic: 434 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 435 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 436 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 437 ; GFX8-NOT: buffer_wbinvl1_vol 438 define amdgpu_kernel void @workgroup_acq_rel_monotonic( 439 i32* %out, i32 %in, i32 %old) { 440 entry: 441 %gep = getelementptr i32, i32* %out, i32 4 442 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic 443 ret void 444 } 445 446 ; GCN-LABEL: {{^}}workgroup_seq_cst_monotonic: 447 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 448 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 449 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 450 ; GFX8-NOT: buffer_wbinvl1_vol 451 define amdgpu_kernel void @workgroup_seq_cst_monotonic( 452 i32* %out, i32 %in, i32 %old) { 453 entry: 454 %gep = getelementptr i32, i32* %out, i32 4 455 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic 456 ret void 457 } 458 459 ; GCN-LABEL: {{^}}workgroup_acquire_acquire: 460 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 461 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 462 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 463 ; GFX8-NOT: buffer_wbinvl1_vol 464 define amdgpu_kernel void @workgroup_acquire_acquire( 465 i32* %out, i32 %in, i32 %old) { 466 entry: 467 %gep = getelementptr i32, i32* %out, i32 4 468 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire 469 ret void 470 } 471 472 ; GCN-LABEL: {{^}}workgroup_release_acquire: 473 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 474 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 475 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 476 ; GFX8-NOT: buffer_wbinvl1_vol 477 define amdgpu_kernel void @workgroup_release_acquire( 478 i32* %out, i32 %in, i32 %old) { 479 entry: 480 %gep = getelementptr i32, i32* %out, i32 4 481 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire 482 ret void 483 } 484 485 ; GCN-LABEL: {{^}}workgroup_acq_rel_acquire: 486 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 487 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 488 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 489 ; GFX8-NOT: buffer_wbinvl1_vol 490 define amdgpu_kernel void @workgroup_acq_rel_acquire( 491 i32* %out, i32 %in, i32 %old) { 492 entry: 493 %gep = getelementptr i32, i32* %out, i32 4 494 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire 495 ret void 496 } 497 498 ; GCN-LABEL: {{^}}workgroup_seq_cst_acquire: 499 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 500 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 501 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 502 ; GFX8-NOT: buffer_wbinvl1_vol 503 define amdgpu_kernel void @workgroup_seq_cst_acquire( 504 i32* %out, i32 %in, i32 %old) { 505 entry: 506 %gep = getelementptr i32, i32* %out, i32 4 507 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire 508 ret void 509 } 510 511 ; GCN-LABEL: {{^}}workgroup_seq_cst_seq_cst: 512 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 513 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 514 ; GFX8-NOT: s_waitcnt vmcnt(0){{$}} 515 ; GFX8-NOT: buffer_wbinvl1_vol 516 define amdgpu_kernel void @workgroup_seq_cst_seq_cst( 517 i32* %out, i32 %in, i32 %old) { 518 entry: 519 %gep = getelementptr i32, i32* %out, i32 4 520 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst 521 ret void 522 } 523 524 ; GCN-LABEL: {{^}}wavefront_monotonic_monotonic: 525 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 526 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 527 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 528 ; GCN-NOT: buffer_wbinvl1_vol 529 define amdgpu_kernel void @wavefront_monotonic_monotonic( 530 i32* %out, i32 %in, i32 %old) { 531 entry: 532 %gep = getelementptr i32, i32* %out, i32 4 533 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic 534 ret void 535 } 536 537 ; GCN-LABEL: {{^}}wavefront_acquire_monotonic: 538 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 539 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 540 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 541 ; GCN-NOT: buffer_wbinvl1_vol 542 define amdgpu_kernel void @wavefront_acquire_monotonic( 543 i32* %out, i32 %in, i32 %old) { 544 entry: 545 %gep = getelementptr i32, i32* %out, i32 4 546 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic 547 ret void 548 } 549 550 ; GCN-LABEL: {{^}}wavefront_release_monotonic: 551 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 552 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 553 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 554 ; GCN-NOT: buffer_wbinvl1_vol 555 define amdgpu_kernel void @wavefront_release_monotonic( 556 i32* %out, i32 %in, i32 %old) { 557 entry: 558 %gep = getelementptr i32, i32* %out, i32 4 559 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic 560 ret void 561 } 562 563 ; GCN-LABEL: {{^}}wavefront_acq_rel_monotonic: 564 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 565 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 566 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 567 ; GCN-NOT: buffer_wbinvl1_vol 568 define amdgpu_kernel void @wavefront_acq_rel_monotonic( 569 i32* %out, i32 %in, i32 %old) { 570 entry: 571 %gep = getelementptr i32, i32* %out, i32 4 572 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic 573 ret void 574 } 575 576 ; GCN-LABEL: {{^}}wavefront_seq_cst_monotonic: 577 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 578 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 579 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 580 ; GCN-NOT: buffer_wbinvl1_vol 581 define amdgpu_kernel void @wavefront_seq_cst_monotonic( 582 i32* %out, i32 %in, i32 %old) { 583 entry: 584 %gep = getelementptr i32, i32* %out, i32 4 585 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic 586 ret void 587 } 588 589 ; GCN-LABEL: {{^}}wavefront_acquire_acquire: 590 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 591 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 592 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 593 ; GCN-NOT: buffer_wbinvl1_vol 594 define amdgpu_kernel void @wavefront_acquire_acquire( 595 i32* %out, i32 %in, i32 %old) { 596 entry: 597 %gep = getelementptr i32, i32* %out, i32 4 598 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire 599 ret void 600 } 601 602 ; GCN-LABEL: {{^}}wavefront_release_acquire: 603 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 604 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 605 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 606 ; GCN-NOT: buffer_wbinvl1_vol 607 define amdgpu_kernel void @wavefront_release_acquire( 608 i32* %out, i32 %in, i32 %old) { 609 entry: 610 %gep = getelementptr i32, i32* %out, i32 4 611 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire 612 ret void 613 } 614 615 ; GCN-LABEL: {{^}}wavefront_acq_rel_acquire: 616 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 617 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 618 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 619 ; GCN-NOT: buffer_wbinvl1_vol 620 define amdgpu_kernel void @wavefront_acq_rel_acquire( 621 i32* %out, i32 %in, i32 %old) { 622 entry: 623 %gep = getelementptr i32, i32* %out, i32 4 624 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire 625 ret void 626 } 627 628 ; GCN-LABEL: {{^}}wavefront_seq_cst_acquire: 629 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 630 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 631 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 632 ; GCN-NOT: buffer_wbinvl1_vol 633 define amdgpu_kernel void @wavefront_seq_cst_acquire( 634 i32* %out, i32 %in, i32 %old) { 635 entry: 636 %gep = getelementptr i32, i32* %out, i32 4 637 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire 638 ret void 639 } 640 641 ; GCN-LABEL: {{^}}wavefront_seq_cst_seq_cst: 642 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 643 ; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}} 644 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 645 ; GCN-NOT: buffer_wbinvl1_vol 646 define amdgpu_kernel void @wavefront_seq_cst_seq_cst( 647 i32* %out, i32 %in, i32 %old) { 648 entry: 649 %gep = getelementptr i32, i32* %out, i32 4 650 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst 651 ret void 652 } 653