Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s
      3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
      4 
      5 ; GCN-LABEL: {{^}}atomic_add_i32_offset:
      6 ; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
      7 ; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
      8 define amdgpu_kernel void @atomic_add_i32_offset(i32* %out, i32 %in) {
      9 entry:
     10   %gep = getelementptr i32, i32* %out, i32 4
     11   %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
     12   ret void
     13 }
     14 
     15 ; GCN-LABEL: {{^}}atomic_add_i32_max_offset:
     16 ; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
     17 ; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}}
     18 define amdgpu_kernel void @atomic_add_i32_max_offset(i32* %out, i32 %in) {
     19 entry:
     20   %gep = getelementptr i32, i32* %out, i32 1023
     21   %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
     22   ret void
     23 }
     24 
     25 ; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1:
     26 ; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
     27 define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32* %out, i32 %in) {
     28 entry:
     29   %gep = getelementptr i32, i32* %out, i32 1024
     30   %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
     31   ret void
     32 }
     33 
     34 ; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
     35 ; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
     36 ; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
     37 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
     38 define amdgpu_kernel void @atomic_add_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
     39 entry:
     40   %gep = getelementptr i32, i32* %out, i32 4
     41   %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
     42   store i32 %val, i32* %out2
     43   ret void
     44 }
     45 
     46 ; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
     47 ; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
     48 ; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
     49 define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
     50 entry:
     51   %ptr = getelementptr i32, i32* %out, i64 %index
     52   %gep = getelementptr i32, i32* %ptr, i32 4
     53   %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
     54   ret void
     55 }
     56 
     57 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
     58 ; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
     59 ; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
     60 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
     61 define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
     62 entry:
     63   %ptr = getelementptr i32, i32* %out, i64 %index
     64   %gep = getelementptr i32, i32* %ptr, i32 4
     65   %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst
     66   store i32 %val, i32* %out2
     67   ret void
     68 }
     69 
     70 ; GCN-LABEL: {{^}}atomic_add_i32:
     71 ; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
     72 define amdgpu_kernel void @atomic_add_i32(i32* %out, i32 %in) {
     73 entry:
     74   %val = atomicrmw volatile add i32* %out, i32 %in seq_cst
     75   ret void
     76 }
     77 
     78 ; GCN-LABEL: {{^}}atomic_add_i32_ret:
     79 ; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
     80 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
     81 define amdgpu_kernel void @atomic_add_i32_ret(i32* %out, i32* %out2, i32 %in) {
     82 entry:
     83   %val = atomicrmw volatile add i32* %out, i32 %in seq_cst
     84   store i32 %val, i32* %out2
     85   ret void
     86 }
     87 
     88 ; GCN-LABEL: {{^}}atomic_add_i32_addr64:
     89 ; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
     90 define amdgpu_kernel void @atomic_add_i32_addr64(i32* %out, i32 %in, i64 %index) {
     91 entry:
     92   %ptr = getelementptr i32, i32* %out, i64 %index
     93   %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst
     94   ret void
     95 }
     96 
     97 ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
     98 ; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
     99 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    100 define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
    101 entry:
    102   %ptr = getelementptr i32, i32* %out, i64 %index
    103   %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst
    104   store i32 %val, i32* %out2
    105   ret void
    106 }
    107 
    108 ; GCN-LABEL: {{^}}atomic_and_i32_offset:
    109 ; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    110 ; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    111 define amdgpu_kernel void @atomic_and_i32_offset(i32* %out, i32 %in) {
    112 entry:
    113   %gep = getelementptr i32, i32* %out, i32 4
    114   %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
    115   ret void
    116 }
    117 
    118 ; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
    119 ; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    120 ; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    121 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    122 define amdgpu_kernel void @atomic_and_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
    123 entry:
    124   %gep = getelementptr i32, i32* %out, i32 4
    125   %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
    126   store i32 %val, i32* %out2
    127   ret void
    128 }
    129 
    130 ; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
    131 ; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    132 ; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    133 define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
    134 entry:
    135   %ptr = getelementptr i32, i32* %out, i64 %index
    136   %gep = getelementptr i32, i32* %ptr, i32 4
    137   %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
    138   ret void
    139 }
    140 
    141 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
    142 ; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    143 ; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    144 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    145 define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
    146 entry:
    147   %ptr = getelementptr i32, i32* %out, i64 %index
    148   %gep = getelementptr i32, i32* %ptr, i32 4
    149   %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst
    150   store i32 %val, i32* %out2
    151   ret void
    152 }
    153 
    154 ; GCN-LABEL: {{^}}atomic_and_i32:
    155 ; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    156 define amdgpu_kernel void @atomic_and_i32(i32* %out, i32 %in) {
    157 entry:
    158   %val = atomicrmw volatile and i32* %out, i32 %in seq_cst
    159   ret void
    160 }
    161 
    162 ; GCN-LABEL: {{^}}atomic_and_i32_ret:
    163 ; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    164 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    165 define amdgpu_kernel void @atomic_and_i32_ret(i32* %out, i32* %out2, i32 %in) {
    166 entry:
    167   %val = atomicrmw volatile and i32* %out, i32 %in seq_cst
    168   store i32 %val, i32* %out2
    169   ret void
    170 }
    171 
    172 ; GCN-LABEL: {{^}}atomic_and_i32_addr64:
    173 ; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    174 define amdgpu_kernel void @atomic_and_i32_addr64(i32* %out, i32 %in, i64 %index) {
    175 entry:
    176   %ptr = getelementptr i32, i32* %out, i64 %index
    177   %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst
    178   ret void
    179 }
    180 
    181 ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
    182 ; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    183 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    184 define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
    185 entry:
    186   %ptr = getelementptr i32, i32* %out, i64 %index
    187   %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst
    188   store i32 %val, i32* %out2
    189   ret void
    190 }
    191 
    192 ; GCN-LABEL: {{^}}atomic_sub_i32_offset:
    193 ; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    194 ; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    195 define amdgpu_kernel void @atomic_sub_i32_offset(i32* %out, i32 %in) {
    196 entry:
    197   %gep = getelementptr i32, i32* %out, i32 4
    198   %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
    199   ret void
    200 }
    201 
    202 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
    203 ; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    204 ; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    205 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    206 define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
    207 entry:
    208   %gep = getelementptr i32, i32* %out, i32 4
    209   %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
    210   store i32 %val, i32* %out2
    211   ret void
    212 }
    213 
    214 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
    215 ; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    216 ; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    217 define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
    218 entry:
    219   %ptr = getelementptr i32, i32* %out, i64 %index
    220   %gep = getelementptr i32, i32* %ptr, i32 4
    221   %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
    222   ret void
    223 }
    224 
    225 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
    226 ; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    227 ; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    228 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    229 define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
    230 entry:
    231   %ptr = getelementptr i32, i32* %out, i64 %index
    232   %gep = getelementptr i32, i32* %ptr, i32 4
    233   %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst
    234   store i32 %val, i32* %out2
    235   ret void
    236 }
    237 
    238 ; GCN-LABEL: {{^}}atomic_sub_i32:
    239 ; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    240 define amdgpu_kernel void @atomic_sub_i32(i32* %out, i32 %in) {
    241 entry:
    242   %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst
    243   ret void
    244 }
    245 
    246 ; GCN-LABEL: {{^}}atomic_sub_i32_ret:
    247 ; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    248 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    249 define amdgpu_kernel void @atomic_sub_i32_ret(i32* %out, i32* %out2, i32 %in) {
    250 entry:
    251   %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst
    252   store i32 %val, i32* %out2
    253   ret void
    254 }
    255 
    256 ; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
    257 ; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    258 define amdgpu_kernel void @atomic_sub_i32_addr64(i32* %out, i32 %in, i64 %index) {
    259 entry:
    260   %ptr = getelementptr i32, i32* %out, i64 %index
    261   %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst
    262   ret void
    263 }
    264 
    265 ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
    266 ; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    267 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    268 define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
    269 entry:
    270   %ptr = getelementptr i32, i32* %out, i64 %index
    271   %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst
    272   store i32 %val, i32* %out2
    273   ret void
    274 }
    275 
    276 ; GCN-LABEL: {{^}}atomic_max_i32_offset:
    277 ; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    278 ; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    279 define amdgpu_kernel void @atomic_max_i32_offset(i32* %out, i32 %in) {
    280 entry:
    281   %gep = getelementptr i32, i32* %out, i32 4
    282   %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
    283   ret void
    284 }
    285 
    286 ; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
    287 ; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    288 ; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    289 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    290 define amdgpu_kernel void @atomic_max_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
    291 entry:
    292   %gep = getelementptr i32, i32* %out, i32 4
    293   %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
    294   store i32 %val, i32* %out2
    295   ret void
    296 }
    297 
    298 ; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
    299 ; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    300 ; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    301 define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
    302 entry:
    303   %ptr = getelementptr i32, i32* %out, i64 %index
    304   %gep = getelementptr i32, i32* %ptr, i32 4
    305   %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
    306   ret void
    307 }
    308 
    309 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
    310 ; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    311 ; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    312 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    313 define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
    314 entry:
    315   %ptr = getelementptr i32, i32* %out, i64 %index
    316   %gep = getelementptr i32, i32* %ptr, i32 4
    317   %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst
    318   store i32 %val, i32* %out2
    319   ret void
    320 }
    321 
    322 ; GCN-LABEL: {{^}}atomic_max_i32:
    323 ; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    324 define amdgpu_kernel void @atomic_max_i32(i32* %out, i32 %in) {
    325 entry:
    326   %val = atomicrmw volatile max i32* %out, i32 %in seq_cst
    327   ret void
    328 }
    329 
    330 ; GCN-LABEL: {{^}}atomic_max_i32_ret:
    331 ; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    332 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    333 define amdgpu_kernel void @atomic_max_i32_ret(i32* %out, i32* %out2, i32 %in) {
    334 entry:
    335   %val = atomicrmw volatile max i32* %out, i32 %in seq_cst
    336   store i32 %val, i32* %out2
    337   ret void
    338 }
    339 
    340 ; GCN-LABEL: {{^}}atomic_max_i32_addr64:
    341 ; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    342 define amdgpu_kernel void @atomic_max_i32_addr64(i32* %out, i32 %in, i64 %index) {
    343 entry:
    344   %ptr = getelementptr i32, i32* %out, i64 %index
    345   %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst
    346   ret void
    347 }
    348 
    349 ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
    350 ; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    351 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    352 define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
    353 entry:
    354   %ptr = getelementptr i32, i32* %out, i64 %index
    355   %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst
    356   store i32 %val, i32* %out2
    357   ret void
    358 }
    359 
    360 ; GCN-LABEL: {{^}}atomic_umax_i32_offset:
    361 ; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    362 ; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    363 define amdgpu_kernel void @atomic_umax_i32_offset(i32* %out, i32 %in) {
    364 entry:
    365   %gep = getelementptr i32, i32* %out, i32 4
    366   %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
    367   ret void
    368 }
    369 
    370 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
    371 ; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    372 ; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    373 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    374 define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
    375 entry:
    376   %gep = getelementptr i32, i32* %out, i32 4
    377   %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
    378   store i32 %val, i32* %out2
    379   ret void
    380 }
    381 
    382 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
    383 ; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    384 ; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    385 define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
    386 entry:
    387   %ptr = getelementptr i32, i32* %out, i64 %index
    388   %gep = getelementptr i32, i32* %ptr, i32 4
    389   %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
    390   ret void
    391 }
    392 
    393 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
    394 ; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    395 ; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    396 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    397 define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
    398 entry:
    399   %ptr = getelementptr i32, i32* %out, i64 %index
    400   %gep = getelementptr i32, i32* %ptr, i32 4
    401   %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst
    402   store i32 %val, i32* %out2
    403   ret void
    404 }
    405 
    406 ; GCN-LABEL: {{^}}atomic_umax_i32:
    407 ; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    408 define amdgpu_kernel void @atomic_umax_i32(i32* %out, i32 %in) {
    409 entry:
    410   %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst
    411   ret void
    412 }
    413 
    414 ; GCN-LABEL: {{^}}atomic_umax_i32_ret:
    415 ; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    416 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    417 define amdgpu_kernel void @atomic_umax_i32_ret(i32* %out, i32* %out2, i32 %in) {
    418 entry:
    419   %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst
    420   store i32 %val, i32* %out2
    421   ret void
    422 }
    423 
    424 ; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
    425 ; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    426 define amdgpu_kernel void @atomic_umax_i32_addr64(i32* %out, i32 %in, i64 %index) {
    427 entry:
    428   %ptr = getelementptr i32, i32* %out, i64 %index
    429   %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst
    430   ret void
    431 }
    432 
    433 ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
    434 ; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    435 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    436 define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
    437 entry:
    438   %ptr = getelementptr i32, i32* %out, i64 %index
    439   %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst
    440   store i32 %val, i32* %out2
    441   ret void
    442 }
    443 
    444 ; GCN-LABEL: {{^}}atomic_min_i32_offset:
    445 ; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    446 ; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    447 define amdgpu_kernel void @atomic_min_i32_offset(i32* %out, i32 %in) {
    448 entry:
    449   %gep = getelementptr i32, i32* %out, i32 4
    450   %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
    451   ret void
    452 }
    453 
    454 ; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
    455 ; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    456 ; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    457 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    458 define amdgpu_kernel void @atomic_min_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
    459 entry:
    460   %gep = getelementptr i32, i32* %out, i32 4
    461   %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
    462   store i32 %val, i32* %out2
    463   ret void
    464 }
    465 
    466 ; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
    467 ; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    468 ; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    469 define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
    470 entry:
    471   %ptr = getelementptr i32, i32* %out, i64 %index
    472   %gep = getelementptr i32, i32* %ptr, i32 4
    473   %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
    474   ret void
    475 }
    476 
    477 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
    478 ; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    479 ; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    480 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    481 define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
    482 entry:
    483   %ptr = getelementptr i32, i32* %out, i64 %index
    484   %gep = getelementptr i32, i32* %ptr, i32 4
    485   %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst
    486   store i32 %val, i32* %out2
    487   ret void
    488 }
    489 
    490 ; GCN-LABEL: {{^}}atomic_min_i32:
    491 ; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    492 define amdgpu_kernel void @atomic_min_i32(i32* %out, i32 %in) {
    493 entry:
    494   %val = atomicrmw volatile min i32* %out, i32 %in seq_cst
    495   ret void
    496 }
    497 
    498 ; GCN-LABEL: {{^}}atomic_min_i32_ret:
    499 ; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    500 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    501 define amdgpu_kernel void @atomic_min_i32_ret(i32* %out, i32* %out2, i32 %in) {
    502 entry:
    503   %val = atomicrmw volatile min i32* %out, i32 %in seq_cst
    504   store i32 %val, i32* %out2
    505   ret void
    506 }
    507 
    508 ; GCN-LABEL: {{^}}atomic_min_i32_addr64:
    509 ; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    510 define amdgpu_kernel void @atomic_min_i32_addr64(i32* %out, i32 %in, i64 %index) {
    511 entry:
    512   %ptr = getelementptr i32, i32* %out, i64 %index
    513   %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst
    514   ret void
    515 }
    516 
    517 ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
    518 ; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    519 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    520 define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
    521 entry:
    522   %ptr = getelementptr i32, i32* %out, i64 %index
    523   %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst
    524   store i32 %val, i32* %out2
    525   ret void
    526 }
    527 
    528 ; GCN-LABEL: {{^}}atomic_umin_i32_offset:
    529 ; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    530 ; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    531 define amdgpu_kernel void @atomic_umin_i32_offset(i32* %out, i32 %in) {
    532 entry:
    533   %gep = getelementptr i32, i32* %out, i32 4
    534   %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
    535   ret void
    536 }
    537 
    538 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
    539 ; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    540 ; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    541 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    542 define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
    543 entry:
    544   %gep = getelementptr i32, i32* %out, i32 4
    545   %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
    546   store i32 %val, i32* %out2
    547   ret void
    548 }
    549 
    550 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
    551 ; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    552 ; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    553 define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
    554 entry:
    555   %ptr = getelementptr i32, i32* %out, i64 %index
    556   %gep = getelementptr i32, i32* %ptr, i32 4
    557   %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
    558   ret void
    559 }
    560 
    561 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
    562 ; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    563 ; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    564 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    565 define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
    566 entry:
    567   %ptr = getelementptr i32, i32* %out, i64 %index
    568   %gep = getelementptr i32, i32* %ptr, i32 4
    569   %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst
    570   store i32 %val, i32* %out2
    571   ret void
    572 }
    573 
    574 ; GCN-LABEL: {{^}}atomic_umin_i32:
    575 ; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    576 define amdgpu_kernel void @atomic_umin_i32(i32* %out, i32 %in) {
    577 entry:
    578   %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst
    579   ret void
    580 }
    581 
    582 ; GCN-LABEL: {{^}}atomic_umin_i32_ret:
    583 ; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    584 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    585 define amdgpu_kernel void @atomic_umin_i32_ret(i32* %out, i32* %out2, i32 %in) {
    586 entry:
    587   %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst
    588   store i32 %val, i32* %out2
    589   ret void
    590 }
    591 
    592 ; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
    593 ; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    594 define amdgpu_kernel void @atomic_umin_i32_addr64(i32* %out, i32 %in, i64 %index) {
    595 entry:
    596   %ptr = getelementptr i32, i32* %out, i64 %index
    597   %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst
    598   ret void
    599 }
    600 
    601 ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
    602 ; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    603 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]{{$}}
    604   define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
    605 entry:
    606   %ptr = getelementptr i32, i32* %out, i64 %index
    607   %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst
    608   store i32 %val, i32* %out2
    609   ret void
    610 }
    611 
    612 ; GCN-LABEL: {{^}}atomic_or_i32_offset:
    613 ; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    614 ; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    615 define amdgpu_kernel void @atomic_or_i32_offset(i32* %out, i32 %in) {
    616 entry:
    617   %gep = getelementptr i32, i32* %out, i32 4
    618   %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
    619   ret void
    620 }
    621 
    622 ; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
    623 ; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    624 ; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    625 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    626 define amdgpu_kernel void @atomic_or_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
    627 entry:
    628   %gep = getelementptr i32, i32* %out, i32 4
    629   %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
    630   store i32 %val, i32* %out2
    631   ret void
    632 }
    633 
    634 ; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
    635 ; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    636 ; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    637 define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
    638 entry:
    639   %ptr = getelementptr i32, i32* %out, i64 %index
    640   %gep = getelementptr i32, i32* %ptr, i32 4
    641   %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
    642   ret void
    643 }
    644 
    645 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
    646 ; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    647 ; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    648 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    649 define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
    650 entry:
    651   %ptr = getelementptr i32, i32* %out, i64 %index
    652   %gep = getelementptr i32, i32* %ptr, i32 4
    653   %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst
    654   store i32 %val, i32* %out2
    655   ret void
    656 }
    657 
    658 ; GCN-LABEL: {{^}}atomic_or_i32:
    659 ; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    660 define amdgpu_kernel void @atomic_or_i32(i32* %out, i32 %in) {
    661 entry:
    662   %val = atomicrmw volatile or i32* %out, i32 %in seq_cst
    663   ret void
    664 }
    665 
    666 ; GCN-LABEL: {{^}}atomic_or_i32_ret:
    667 ; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    668 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    669 define amdgpu_kernel void @atomic_or_i32_ret(i32* %out, i32* %out2, i32 %in) {
    670 entry:
    671   %val = atomicrmw volatile or i32* %out, i32 %in seq_cst
    672   store i32 %val, i32* %out2
    673   ret void
    674 }
    675 
    676 ; GCN-LABEL: {{^}}atomic_or_i32_addr64:
    677 ; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    678 define amdgpu_kernel void @atomic_or_i32_addr64(i32* %out, i32 %in, i64 %index) {
    679 entry:
    680   %ptr = getelementptr i32, i32* %out, i64 %index
    681   %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst
    682   ret void
    683 }
    684 
    685 ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
    686 ; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    687 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    688 define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
    689 entry:
    690   %ptr = getelementptr i32, i32* %out, i64 %index
    691   %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst
    692   store i32 %val, i32* %out2
    693   ret void
    694 }
    695 
    696 ; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
    697 ; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    698 ; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    699 define amdgpu_kernel void @atomic_xchg_i32_offset(i32* %out, i32 %in) {
    700 entry:
    701   %gep = getelementptr i32, i32* %out, i32 4
    702   %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
    703   ret void
    704 }
    705 
    706 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
    707 ; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    708 ; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    709 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    710 define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
    711 entry:
    712   %gep = getelementptr i32, i32* %out, i32 4
    713   %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
    714   store i32 %val, i32* %out2
    715   ret void
    716 }
    717 
    718 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
    719 ; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    720 ; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    721 define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
    722 entry:
    723   %ptr = getelementptr i32, i32* %out, i64 %index
    724   %gep = getelementptr i32, i32* %ptr, i32 4
    725   %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
    726   ret void
    727 }
    728 
    729 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
    730 ; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    731 ; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    732 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    733 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
    734 entry:
    735   %ptr = getelementptr i32, i32* %out, i64 %index
    736   %gep = getelementptr i32, i32* %ptr, i32 4
    737   %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
    738   store i32 %val, i32* %out2
    739   ret void
    740 }
    741 
    742 ; GCN-LABEL: {{^}}atomic_xchg_i32:
    743 ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
    744 define amdgpu_kernel void @atomic_xchg_i32(i32* %out, i32 %in) {
    745 entry:
    746   %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
    747   ret void
    748 }
    749 
    750 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
    751 ; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
    752 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    753 define amdgpu_kernel void @atomic_xchg_i32_ret(i32* %out, i32* %out2, i32 %in) {
    754 entry:
    755   %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
    756   store i32 %val, i32* %out2
    757   ret void
    758 }
    759 
    760 ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
    761 ; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    762 define amdgpu_kernel void @atomic_xchg_i32_addr64(i32* %out, i32 %in, i64 %index) {
    763 entry:
    764   %ptr = getelementptr i32, i32* %out, i64 %index
    765   %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst
    766   ret void
    767 }
    768 
    769 ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
    770 ; GCN: flat_atomic_swap [[RET:v[0-9]+]],  v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    771 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    772 define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
    773 entry:
    774   %ptr = getelementptr i32, i32* %out, i64 %index
    775   %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst
    776   store i32 %val, i32* %out2
    777   ret void
    778 }
    779 
    780 ; CMP_SWAP
    781 
    782 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
    783 ; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    784 ; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
    785 define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32* %out, i32 %in, i32 %old) {
    786 entry:
    787   %gep = getelementptr i32, i32* %out, i32 4
    788   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
    789   ret void
    790 }
    791 
    792 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
    793 ; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
    794 ; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
    795 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
    796 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in, i32 %old) {
    797 entry:
    798   %gep = getelementptr i32, i32* %out, i32 4
    799   %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
    800   %flag = extractvalue { i32, i1 } %val, 0
    801   store i32 %flag, i32* %out2
    802   ret void
    803 }
    804 
    805 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
    806 ; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    807 ; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
    808 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index, i32 %old) {
    809 entry:
    810   %ptr = getelementptr i32, i32* %out, i64 %index
    811   %gep = getelementptr i32, i32* %ptr, i32 4
    812   %val  = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
    813   ret void
    814 }
    815 
    816 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
    817 ; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
    818 ; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
    819 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
    820 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) {
    821 entry:
    822   %ptr = getelementptr i32, i32* %out, i64 %index
    823   %gep = getelementptr i32, i32* %ptr, i32 4
    824   %val  = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
    825   %flag = extractvalue { i32, i1 } %val, 0
    826   store i32 %flag, i32* %out2
    827   ret void
    828 }
    829 
    830 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
    831 ; GCN: flat_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    832 define amdgpu_kernel void @atomic_cmpxchg_i32(i32* %out, i32 %in, i32 %old) {
    833 entry:
    834   %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst
    835   ret void
    836 }
    837 
    838 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
    839 ; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc
    840 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
    841 define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32* %out, i32* %out2, i32 %in, i32 %old) {
    842 entry:
    843   %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst
    844   %flag = extractvalue { i32, i1 } %val, 0
    845   store i32 %flag, i32* %out2
    846   ret void
    847 }
    848 
    849 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
    850 ; GCN: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
    851 define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32* %out, i32 %in, i64 %index, i32 %old) {
    852 entry:
    853   %ptr = getelementptr i32, i32* %out, i64 %index
    854   %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst
    855   ret void
    856 }
    857 
    858 ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
    859 ; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
    860 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]]
    861 define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) {
    862 entry:
    863   %ptr = getelementptr i32, i32* %out, i64 %index
    864   %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst
    865   %flag = extractvalue { i32, i1 } %val, 0
    866   store i32 %flag, i32* %out2
    867   ret void
    868 }
    869 
    870 ; GCN-LABEL: {{^}}atomic_xor_i32_offset:
    871 ; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
    872 ; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    873 define amdgpu_kernel void @atomic_xor_i32_offset(i32* %out, i32 %in) {
    874 entry:
    875   %gep = getelementptr i32, i32* %out, i32 4
    876   %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
    877   ret void
    878 }
    879 
    880 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
    881 ; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
    882 ; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    883 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    884 define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
    885 entry:
    886   %gep = getelementptr i32, i32* %out, i32 4
    887   %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
    888   store i32 %val, i32* %out2
    889   ret void
    890 }
    891 
    892 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
    893 ; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    894 ; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
    895 define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32* %out, i32 %in, i64 %index) {
    896 entry:
    897   %ptr = getelementptr i32, i32* %out, i64 %index
    898   %gep = getelementptr i32, i32* %ptr, i32 4
    899   %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
    900   ret void
    901 }
    902 
    903 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
    904 ; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    905 ; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
    906 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    907 define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) {
    908 entry:
    909   %ptr = getelementptr i32, i32* %out, i64 %index
    910   %gep = getelementptr i32, i32* %ptr, i32 4
    911   %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst
    912   store i32 %val, i32* %out2
    913   ret void
    914 }
    915 
    916 ; GCN-LABEL: {{^}}atomic_xor_i32:
    917 ; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
    918 define amdgpu_kernel void @atomic_xor_i32(i32* %out, i32 %in) {
    919 entry:
    920   %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst
    921   ret void
    922 }
    923 
    924 ; GCN-LABEL: {{^}}atomic_xor_i32_ret:
    925 ; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
    926 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    927 define amdgpu_kernel void @atomic_xor_i32_ret(i32* %out, i32* %out2, i32 %in) {
    928 entry:
    929   %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst
    930   store i32 %val, i32* %out2
    931   ret void
    932 }
    933 
    934 ; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
    935 ; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
    936 define amdgpu_kernel void @atomic_xor_i32_addr64(i32* %out, i32 %in, i64 %index) {
    937 entry:
    938   %ptr = getelementptr i32, i32* %out, i64 %index
    939   %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst
    940   ret void
    941 }
    942 
    943 ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
    944 ; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
    945 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    946 define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) {
    947 entry:
    948   %ptr = getelementptr i32, i32* %out, i64 %index
    949   %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst
    950   store i32 %val, i32* %out2
    951   ret void
    952 }
    953 
    954 ; GCN-LABEL: {{^}}atomic_load_i32_offset:
    955 ; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
    956 ; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
    957 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    958 define amdgpu_kernel void @atomic_load_i32_offset(i32* %in, i32* %out) {
    959 entry:
    960   %gep = getelementptr i32, i32* %in, i32 4
    961   %val = load atomic i32, i32* %gep  seq_cst, align 4
    962   store i32 %val, i32* %out
    963   ret void
    964 }
    965 
    966 ; GCN-LABEL: {{^}}atomic_load_i32:
    967 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
    968 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    969 define amdgpu_kernel void @atomic_load_i32(i32* %in, i32* %out) {
    970 entry:
    971   %val = load atomic i32, i32* %in seq_cst, align 4
    972   store i32 %val, i32* %out
    973   ret void
    974 }
    975 
    976 ; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
    977 ; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
    978 ; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
    979 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    980 define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32* %in, i32* %out, i64 %index) {
    981 entry:
    982   %ptr = getelementptr i32, i32* %in, i64 %index
    983   %gep = getelementptr i32, i32* %ptr, i32 4
    984   %val = load atomic i32, i32* %gep seq_cst, align 4
    985   store i32 %val, i32* %out
    986   ret void
    987 }
    988 
    989 ; GCN-LABEL: {{^}}atomic_load_i32_addr64:
    990 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
    991 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    992 define amdgpu_kernel void @atomic_load_i32_addr64(i32* %in, i32* %out, i64 %index) {
    993 entry:
    994   %ptr = getelementptr i32, i32* %in, i64 %index
    995   %val = load atomic i32, i32* %ptr seq_cst, align 4
    996   store i32 %val, i32* %out
    997   ret void
    998 }
    999 
   1000 ; GCN-LABEL: {{^}}atomic_store_i32_offset:
   1001 ; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
   1002 ; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
   1003 define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32* %out) {
   1004 entry:
   1005   %gep = getelementptr i32, i32* %out, i32 4
   1006   store atomic i32 %in, i32* %gep  seq_cst, align 4
   1007   ret void
   1008 }
   1009 
   1010 ; GCN-LABEL: {{^}}atomic_store_i32:
   1011 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
   1012 define amdgpu_kernel void @atomic_store_i32(i32 %in, i32* %out) {
   1013 entry:
   1014   store atomic i32 %in, i32* %out seq_cst, align 4
   1015   ret void
   1016 }
   1017 
   1018 ; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
   1019 ; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
   1020 ; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
   1021 define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32* %out, i64 %index) {
   1022 entry:
   1023   %ptr = getelementptr i32, i32* %out, i64 %index
   1024   %gep = getelementptr i32, i32* %ptr, i32 4
   1025   store atomic i32 %in, i32* %gep seq_cst, align 4
   1026   ret void
   1027 }
   1028 
   1029 ; GCN-LABEL: {{^}}atomic_store_i32_addr64:
   1030 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
   1031 define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32* %out, i64 %index) {
   1032 entry:
   1033   %ptr = getelementptr i32, i32* %out, i64 %index
   1034   store atomic i32 %in, i32* %ptr seq_cst, align 4
   1035   ret void
   1036 }
   1037