Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
      2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
      3 
      4 ; GCN-LABEL: {{^}}system_monotonic_monotonic:
      5 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
      6 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
      7 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
      8 ; GCN-NOT:   buffer_wbinvl1_vol
      9 define amdgpu_kernel void @system_monotonic_monotonic(
     10     i32* %out, i32 %in, i32 %old) {
     11 entry:
     12   %gep = getelementptr i32, i32* %out, i32 4
     13   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic
     14   ret void
     15 }
     16 
     17 ; GCN-LABEL: {{^}}system_acquire_monotonic:
     18 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
     19 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
     20 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
     21 ; GFX8-NEXT:  buffer_wbinvl1_vol
     22 define amdgpu_kernel void @system_acquire_monotonic(
     23     i32* %out, i32 %in, i32 %old) {
     24 entry:
     25   %gep = getelementptr i32, i32* %out, i32 4
     26   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic
     27   ret void
     28 }
     29 
     30 ; GCN-LABEL: {{^}}system_release_monotonic:
     31 ; GCN:        s_waitcnt vmcnt(0){{$}}
     32 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
     33 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
     34 ; GCN-NOT:    buffer_wbinvl1_vol
     35 define amdgpu_kernel void @system_release_monotonic(
     36     i32* %out, i32 %in, i32 %old) {
     37 entry:
     38   %gep = getelementptr i32, i32* %out, i32 4
     39   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic
     40   ret void
     41 }
     42 
     43 ; GCN-LABEL: {{^}}system_acq_rel_monotonic:
     44 ; GCN:        s_waitcnt vmcnt(0){{$}}
     45 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
     46 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
     47 ; GFX8-NEXT:  buffer_wbinvl1_vol
     48 define amdgpu_kernel void @system_acq_rel_monotonic(
     49     i32* %out, i32 %in, i32 %old) {
     50 entry:
     51   %gep = getelementptr i32, i32* %out, i32 4
     52   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic
     53   ret void
     54 }
     55 
     56 ; GCN-LABEL: {{^}}system_seq_cst_monotonic:
     57 ; GCN:        s_waitcnt vmcnt(0){{$}}
     58 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
     59 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
     60 ; GFX8-NEXT:  buffer_wbinvl1_vol
     61 define amdgpu_kernel void @system_seq_cst_monotonic(
     62     i32* %out, i32 %in, i32 %old) {
     63 entry:
     64   %gep = getelementptr i32, i32* %out, i32 4
     65   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic
     66   ret void
     67 }
     68 
     69 ; GCN-LABEL: {{^}}system_acquire_acquire:
     70 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
     71 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
     72 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
     73 ; GFX8-NEXT:  buffer_wbinvl1_vol
     74 define amdgpu_kernel void @system_acquire_acquire(
     75     i32* %out, i32 %in, i32 %old) {
     76 entry:
     77   %gep = getelementptr i32, i32* %out, i32 4
     78   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire
     79   ret void
     80 }
     81 
     82 ; GCN-LABEL: {{^}}system_release_acquire:
     83 ; GCN:        s_waitcnt vmcnt(0){{$}}
     84 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
     85 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
     86 ; GFX8-NEXT:  buffer_wbinvl1_vol
     87 define amdgpu_kernel void @system_release_acquire(
     88     i32* %out, i32 %in, i32 %old) {
     89 entry:
     90   %gep = getelementptr i32, i32* %out, i32 4
     91   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire
     92   ret void
     93 }
     94 
     95 ; GCN-LABEL: {{^}}system_acq_rel_acquire:
     96 ; GCN:        s_waitcnt vmcnt(0){{$}}
     97 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
     98 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
     99 ; GFX8-NEXT:  buffer_wbinvl1_vol
    100 define amdgpu_kernel void @system_acq_rel_acquire(
    101     i32* %out, i32 %in, i32 %old) {
    102 entry:
    103   %gep = getelementptr i32, i32* %out, i32 4
    104   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire
    105   ret void
    106 }
    107 
    108 ; GCN-LABEL: {{^}}system_seq_cst_acquire:
    109 ; GCN:        s_waitcnt vmcnt(0){{$}}
    110 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    111 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
    112 ; GFX8-NEXT:  buffer_wbinvl1_vol
    113 define amdgpu_kernel void @system_seq_cst_acquire(
    114     i32* %out, i32 %in, i32 %old) {
    115 entry:
    116   %gep = getelementptr i32, i32* %out, i32 4
    117   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire
    118   ret void
    119 }
    120 
    121 ; GCN-LABEL: {{^}}system_seq_cst_seq_cst:
    122 ; GCN:        s_waitcnt vmcnt(0){{$}}
    123 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    124 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
    125 ; GFX8-NEXT:  buffer_wbinvl1_vol
    126 define amdgpu_kernel void @system_seq_cst_seq_cst(
    127     i32* %out, i32 %in, i32 %old) {
    128 entry:
    129   %gep = getelementptr i32, i32* %out, i32 4
    130   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
    131   ret void
    132 }
    133 
    134 ; GCN-LABEL: {{^}}singlethread_monotonic_monotonic:
    135 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    136 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    137 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    138 ; GCN-NOT:   buffer_wbinvl1_vol
    139 define amdgpu_kernel void @singlethread_monotonic_monotonic(
    140     i32* %out, i32 %in, i32 %old) {
    141 entry:
    142   %gep = getelementptr i32, i32* %out, i32 4
    143   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
    144   ret void
    145 }
    146 
    147 ; GCN-LABEL: {{^}}singlethread_acquire_monotonic:
    148 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    149 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    150 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    151 ; GCN-NOT:   buffer_wbinvl1_vol
    152 define amdgpu_kernel void @singlethread_acquire_monotonic(
    153     i32* %out, i32 %in, i32 %old) {
    154 entry:
    155   %gep = getelementptr i32, i32* %out, i32 4
    156   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
    157   ret void
    158 }
    159 
    160 ; GCN-LABEL: {{^}}singlethread_release_monotonic:
    161 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    162 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    163 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    164 ; GCN-NOT:   buffer_wbinvl1_vol
    165 define amdgpu_kernel void @singlethread_release_monotonic(
    166     i32* %out, i32 %in, i32 %old) {
    167 entry:
    168   %gep = getelementptr i32, i32* %out, i32 4
    169   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
    170   ret void
    171 }
    172 
    173 ; GCN-LABEL: {{^}}singlethread_acq_rel_monotonic:
    174 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    175 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    176 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    177 ; GCN-NOT:   buffer_wbinvl1_vol
    178 define amdgpu_kernel void @singlethread_acq_rel_monotonic(
    179     i32* %out, i32 %in, i32 %old) {
    180 entry:
    181   %gep = getelementptr i32, i32* %out, i32 4
    182   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
    183   ret void
    184 }
    185 
    186 ; GCN-LABEL: {{^}}singlethread_seq_cst_monotonic:
    187 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    188 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    189 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    190 ; GCN-NOT:   buffer_wbinvl1_vol
    191 define amdgpu_kernel void @singlethread_seq_cst_monotonic(
    192     i32* %out, i32 %in, i32 %old) {
    193 entry:
    194   %gep = getelementptr i32, i32* %out, i32 4
    195   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
    196   ret void
    197 }
    198 
    199 ; GCN-LABEL: {{^}}singlethread_acquire_acquire:
    200 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    201 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    202 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    203 ; GCN-NOT:   buffer_wbinvl1_vol
    204 define amdgpu_kernel void @singlethread_acquire_acquire(
    205     i32* %out, i32 %in, i32 %old) {
    206 entry:
    207   %gep = getelementptr i32, i32* %out, i32 4
    208   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
    209   ret void
    210 }
    211 
    212 ; GCN-LABEL: {{^}}singlethread_release_acquire:
    213 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    214 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    215 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    216 ; GCN-NOT:   buffer_wbinvl1_vol
    217 define amdgpu_kernel void @singlethread_release_acquire(
    218     i32* %out, i32 %in, i32 %old) {
    219 entry:
    220   %gep = getelementptr i32, i32* %out, i32 4
    221   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
    222   ret void
    223 }
    224 
    225 ; GCN-LABEL: {{^}}singlethread_acq_rel_acquire:
    226 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    227 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    228 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    229 ; GCN-NOT:   buffer_wbinvl1_vol
    230 define amdgpu_kernel void @singlethread_acq_rel_acquire(
    231     i32* %out, i32 %in, i32 %old) {
    232 entry:
    233   %gep = getelementptr i32, i32* %out, i32 4
    234   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
    235   ret void
    236 }
    237 
    238 ; GCN-LABEL: {{^}}singlethread_seq_cst_acquire:
    239 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    240 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    241 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    242 ; GCN-NOT:   buffer_wbinvl1_vol
    243 define amdgpu_kernel void @singlethread_seq_cst_acquire(
    244     i32* %out, i32 %in, i32 %old) {
    245 entry:
    246   %gep = getelementptr i32, i32* %out, i32 4
    247   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
    248   ret void
    249 }
    250 
    251 ; GCN-LABEL: {{^}}singlethread_seq_cst_seq_cst:
    252 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    253 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    254 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    255 ; GCN-NOT:   buffer_wbinvl1_vol
    256 define amdgpu_kernel void @singlethread_seq_cst_seq_cst(
    257     i32* %out, i32 %in, i32 %old) {
    258 entry:
    259   %gep = getelementptr i32, i32* %out, i32 4
    260   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
    261   ret void
    262 }
    263 
    264 ; GCN-LABEL: {{^}}agent_monotonic_monotonic:
    265 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    266 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    267 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    268 ; GCN-NOT:   buffer_wbinvl1_vol
    269 define amdgpu_kernel void @agent_monotonic_monotonic(
    270     i32* %out, i32 %in, i32 %old) {
    271 entry:
    272   %gep = getelementptr i32, i32* %out, i32 4
    273   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
    274   ret void
    275 }
    276 
    277 ; GCN-LABEL: {{^}}agent_acquire_monotonic:
    278 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
    279 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    280 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
    281 ; GFX8-NEXT:  buffer_wbinvl1_vol
    282 define amdgpu_kernel void @agent_acquire_monotonic(
    283     i32* %out, i32 %in, i32 %old) {
    284 entry:
    285   %gep = getelementptr i32, i32* %out, i32 4
    286   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
    287   ret void
    288 }
    289 
    290 ; GCN-LABEL: {{^}}agent_release_monotonic:
    291 ; GCN:        s_waitcnt vmcnt(0){{$}}
    292 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    293 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
    294 ; GCN-NOT:    buffer_wbinvl1_vol
    295 define amdgpu_kernel void @agent_release_monotonic(
    296     i32* %out, i32 %in, i32 %old) {
    297 entry:
    298   %gep = getelementptr i32, i32* %out, i32 4
    299   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic
    300   ret void
    301 }
    302 
    303 ; GCN-LABEL: {{^}}agent_acq_rel_monotonic:
    304 ; GCN:        s_waitcnt vmcnt(0){{$}}
    305 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    306 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
    307 ; GFX8-NEXT:  buffer_wbinvl1_vol
    308 define amdgpu_kernel void @agent_acq_rel_monotonic(
    309     i32* %out, i32 %in, i32 %old) {
    310 entry:
    311   %gep = getelementptr i32, i32* %out, i32 4
    312   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
    313   ret void
    314 }
    315 
    316 ; GCN-LABEL: {{^}}agent_seq_cst_monotonic:
    317 ; GCN:        s_waitcnt vmcnt(0){{$}}
    318 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    319 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
    320 ; GFX8-NEXT:  buffer_wbinvl1_vol
    321 define amdgpu_kernel void @agent_seq_cst_monotonic(
    322     i32* %out, i32 %in, i32 %old) {
    323 entry:
    324   %gep = getelementptr i32, i32* %out, i32 4
    325   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
    326   ret void
    327 }
    328 
    329 ; GCN-LABEL: {{^}}agent_acquire_acquire:
    330 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
    331 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    332 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
    333 ; GFX8-NEXT:  buffer_wbinvl1_vol
    334 define amdgpu_kernel void @agent_acquire_acquire(
    335     i32* %out, i32 %in, i32 %old) {
    336 entry:
    337   %gep = getelementptr i32, i32* %out, i32 4
    338   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
    339   ret void
    340 }
    341 
    342 ; GCN-LABEL: {{^}}agent_release_acquire:
    343 ; GCN:        s_waitcnt vmcnt(0){{$}}
    344 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    345 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
    346 ; GFX8-NEXT:  buffer_wbinvl1_vol
    347 define amdgpu_kernel void @agent_release_acquire(
    348     i32* %out, i32 %in, i32 %old) {
    349 entry:
    350   %gep = getelementptr i32, i32* %out, i32 4
    351   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire
    352   ret void
    353 }
    354 
    355 ; GCN-LABEL: {{^}}agent_acq_rel_acquire:
    356 ; GCN:        s_waitcnt vmcnt(0){{$}}
    357 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    358 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
    359 ; GFX8-NEXT:  buffer_wbinvl1_vol
    360 define amdgpu_kernel void @agent_acq_rel_acquire(
    361     i32* %out, i32 %in, i32 %old) {
    362 entry:
    363   %gep = getelementptr i32, i32* %out, i32 4
    364   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
    365   ret void
    366 }
    367 
    368 ; GCN-LABEL: {{^}}agent_seq_cst_acquire:
    369 ; GCN:        s_waitcnt vmcnt(0){{$}}
    370 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    371 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
    372 ; GFX8-NEXT:  buffer_wbinvl1_vol
    373 define amdgpu_kernel void @agent_seq_cst_acquire(
    374     i32* %out, i32 %in, i32 %old) {
    375 entry:
    376   %gep = getelementptr i32, i32* %out, i32 4
    377   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
    378   ret void
    379 }
    380 
    381 ; GCN-LABEL: {{^}}agent_seq_cst_seq_cst:
    382 ; GCN:        s_waitcnt vmcnt(0){{$}}
    383 ; GCN-NEXT:   flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    384 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
    385 ; GFX8-NEXT:  buffer_wbinvl1_vol
    386 define amdgpu_kernel void @agent_seq_cst_seq_cst(
    387     i32* %out, i32 %in, i32 %old) {
    388 entry:
    389   %gep = getelementptr i32, i32* %out, i32 4
    390   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
    391   ret void
    392 }
    393 
    394 ; GCN-LABEL: {{^}}workgroup_monotonic_monotonic:
    395 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    396 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    397 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    398 ; GCN-NOT:   buffer_wbinvl1_vol
    399 define amdgpu_kernel void @workgroup_monotonic_monotonic(
    400     i32* %out, i32 %in, i32 %old) {
    401 entry:
    402   %gep = getelementptr i32, i32* %out, i32 4
    403   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic
    404   ret void
    405 }
    406 
    407 ; GCN-LABEL: {{^}}workgroup_acquire_monotonic:
    408 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
    409 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    410 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    411 ; GFX8-NOT:   buffer_wbinvl1_vol
    412 define amdgpu_kernel void @workgroup_acquire_monotonic(
    413     i32* %out, i32 %in, i32 %old) {
    414 entry:
    415   %gep = getelementptr i32, i32* %out, i32 4
    416   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
    417   ret void
    418 }
    419 
    420 ; GCN-LABEL: {{^}}workgroup_release_monotonic:
    421 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    422 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    423 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
    424 ; GCN-NOT:    buffer_wbinvl1_vol
    425 define amdgpu_kernel void @workgroup_release_monotonic(
    426     i32* %out, i32 %in, i32 %old) {
    427 entry:
    428   %gep = getelementptr i32, i32* %out, i32 4
    429   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic
    430   ret void
    431 }
    432 
    433 ; GCN-LABEL: {{^}}workgroup_acq_rel_monotonic:
    434 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    435 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    436 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    437 ; GFX8-NOT:   buffer_wbinvl1_vol
    438 define amdgpu_kernel void @workgroup_acq_rel_monotonic(
    439     i32* %out, i32 %in, i32 %old) {
    440 entry:
    441   %gep = getelementptr i32, i32* %out, i32 4
    442   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
    443   ret void
    444 }
    445 
    446 ; GCN-LABEL: {{^}}workgroup_seq_cst_monotonic:
    447 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    448 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    449 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    450 ; GFX8-NOT:   buffer_wbinvl1_vol
    451 define amdgpu_kernel void @workgroup_seq_cst_monotonic(
    452     i32* %out, i32 %in, i32 %old) {
    453 entry:
    454   %gep = getelementptr i32, i32* %out, i32 4
    455   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
    456   ret void
    457 }
    458 
    459 ; GCN-LABEL: {{^}}workgroup_acquire_acquire:
    460 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
    461 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    462 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    463 ; GFX8-NOT:   buffer_wbinvl1_vol
    464 define amdgpu_kernel void @workgroup_acquire_acquire(
    465     i32* %out, i32 %in, i32 %old) {
    466 entry:
    467   %gep = getelementptr i32, i32* %out, i32 4
    468   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
    469   ret void
    470 }
    471 
    472 ; GCN-LABEL: {{^}}workgroup_release_acquire:
    473 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    474 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    475 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    476 ; GFX8-NOT:   buffer_wbinvl1_vol
    477 define amdgpu_kernel void @workgroup_release_acquire(
    478     i32* %out, i32 %in, i32 %old) {
    479 entry:
    480   %gep = getelementptr i32, i32* %out, i32 4
    481   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
    482   ret void
    483 }
    484 
    485 ; GCN-LABEL: {{^}}workgroup_acq_rel_acquire:
    486 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    487 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    488 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    489 ; GFX8-NOT:   buffer_wbinvl1_vol
    490 define amdgpu_kernel void @workgroup_acq_rel_acquire(
    491     i32* %out, i32 %in, i32 %old) {
    492 entry:
    493   %gep = getelementptr i32, i32* %out, i32 4
    494   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
    495   ret void
    496 }
    497 
    498 ; GCN-LABEL: {{^}}workgroup_seq_cst_acquire:
    499 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    500 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    501 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    502 ; GFX8-NOT:   buffer_wbinvl1_vol
    503 define amdgpu_kernel void @workgroup_seq_cst_acquire(
    504     i32* %out, i32 %in, i32 %old) {
    505 entry:
    506   %gep = getelementptr i32, i32* %out, i32 4
    507   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
    508   ret void
    509 }
    510 
    511 ; GCN-LABEL: {{^}}workgroup_seq_cst_seq_cst:
    512 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    513 ; GCN:        flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    514 ; GFX8-NOT:   s_waitcnt vmcnt(0){{$}}
    515 ; GFX8-NOT:   buffer_wbinvl1_vol
    516 define amdgpu_kernel void @workgroup_seq_cst_seq_cst(
    517     i32* %out, i32 %in, i32 %old) {
    518 entry:
    519   %gep = getelementptr i32, i32* %out, i32 4
    520   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
    521   ret void
    522 }
    523 
    524 ; GCN-LABEL: {{^}}wavefront_monotonic_monotonic:
    525 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    526 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    527 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    528 ; GCN-NOT:   buffer_wbinvl1_vol
    529 define amdgpu_kernel void @wavefront_monotonic_monotonic(
    530     i32* %out, i32 %in, i32 %old) {
    531 entry:
    532   %gep = getelementptr i32, i32* %out, i32 4
    533   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic
    534   ret void
    535 }
    536 
    537 ; GCN-LABEL: {{^}}wavefront_acquire_monotonic:
    538 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    539 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    540 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    541 ; GCN-NOT:   buffer_wbinvl1_vol
    542 define amdgpu_kernel void @wavefront_acquire_monotonic(
    543     i32* %out, i32 %in, i32 %old) {
    544 entry:
    545   %gep = getelementptr i32, i32* %out, i32 4
    546   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic
    547   ret void
    548 }
    549 
    550 ; GCN-LABEL: {{^}}wavefront_release_monotonic:
    551 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    552 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    553 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    554 ; GCN-NOT:   buffer_wbinvl1_vol
    555 define amdgpu_kernel void @wavefront_release_monotonic(
    556     i32* %out, i32 %in, i32 %old) {
    557 entry:
    558   %gep = getelementptr i32, i32* %out, i32 4
    559   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic
    560   ret void
    561 }
    562 
    563 ; GCN-LABEL: {{^}}wavefront_acq_rel_monotonic:
    564 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    565 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    566 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    567 ; GCN-NOT:   buffer_wbinvl1_vol
    568 define amdgpu_kernel void @wavefront_acq_rel_monotonic(
    569     i32* %out, i32 %in, i32 %old) {
    570 entry:
    571   %gep = getelementptr i32, i32* %out, i32 4
    572   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic
    573   ret void
    574 }
    575 
    576 ; GCN-LABEL: {{^}}wavefront_seq_cst_monotonic:
    577 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    578 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    579 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    580 ; GCN-NOT:   buffer_wbinvl1_vol
    581 define amdgpu_kernel void @wavefront_seq_cst_monotonic(
    582     i32* %out, i32 %in, i32 %old) {
    583 entry:
    584   %gep = getelementptr i32, i32* %out, i32 4
    585   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic
    586   ret void
    587 }
    588 
    589 ; GCN-LABEL: {{^}}wavefront_acquire_acquire:
    590 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    591 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    592 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    593 ; GCN-NOT:   buffer_wbinvl1_vol
    594 define amdgpu_kernel void @wavefront_acquire_acquire(
    595     i32* %out, i32 %in, i32 %old) {
    596 entry:
    597   %gep = getelementptr i32, i32* %out, i32 4
    598   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire
    599   ret void
    600 }
    601 
    602 ; GCN-LABEL: {{^}}wavefront_release_acquire:
    603 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    604 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    605 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    606 ; GCN-NOT:   buffer_wbinvl1_vol
    607 define amdgpu_kernel void @wavefront_release_acquire(
    608     i32* %out, i32 %in, i32 %old) {
    609 entry:
    610   %gep = getelementptr i32, i32* %out, i32 4
    611   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire
    612   ret void
    613 }
    614 
    615 ; GCN-LABEL: {{^}}wavefront_acq_rel_acquire:
    616 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    617 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    618 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    619 ; GCN-NOT:   buffer_wbinvl1_vol
    620 define amdgpu_kernel void @wavefront_acq_rel_acquire(
    621     i32* %out, i32 %in, i32 %old) {
    622 entry:
    623   %gep = getelementptr i32, i32* %out, i32 4
    624   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire
    625   ret void
    626 }
    627 
    628 ; GCN-LABEL: {{^}}wavefront_seq_cst_acquire:
    629 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    630 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    631 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    632 ; GCN-NOT:   buffer_wbinvl1_vol
    633 define amdgpu_kernel void @wavefront_seq_cst_acquire(
    634     i32* %out, i32 %in, i32 %old) {
    635 entry:
    636   %gep = getelementptr i32, i32* %out, i32 4
    637   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire
    638   ret void
    639 }
    640 
    641 ; GCN-LABEL: {{^}}wavefront_seq_cst_seq_cst:
    642 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    643 ; GCN:       flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
    644 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    645 ; GCN-NOT:   buffer_wbinvl1_vol
    646 define amdgpu_kernel void @wavefront_seq_cst_seq_cst(
    647     i32* %out, i32 %in, i32 %old) {
    648 entry:
    649   %gep = getelementptr i32, i32* %out, i32 4
    650   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst
    651   ret void
    652 }
    653