Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SI,SICIVI %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,VI,SICIVI,GFX89 %s
      3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9,GFX89 %s
      4 
      5 ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64:
      6 ; SICIVI: s_mov_b32 m0
      7 ; GFX9-NOT: m0
      8 
      9 ; GCN: ds_wrxchg_rtn_b64
     10 ; GCN: s_endpgm
     11 define amdgpu_kernel void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
     12   %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
     13   store i64 %result, i64 addrspace(1)* %out, align 8
     14   ret void
     15 }
     16 
     17 ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
     18 ; SICIVI: s_mov_b32 m0
     19 ; GFX9-NOT: m0
     20 
     21 ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
     22 ; GCN: s_endpgm
     23 define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
     24   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
     25   %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
     26   store i64 %result, i64 addrspace(1)* %out, align 8
     27   ret void
     28 }
     29 
     30 ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
     31 ; SICIVI: s_mov_b32 m0
     32 ; GFX9-NOT: m0
     33 
     34 ; GCN: ds_add_rtn_u64
     35 ; GCN: s_endpgm
     36 define amdgpu_kernel void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
     37   %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
     38   store i64 %result, i64 addrspace(1)* %out, align 8
     39   ret void
     40 }
     41 
     42 ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
     43 ; SICIVI-DAG: s_mov_b32 m0
     44 ; GFX9-NOT: m0
     45 
     46 ; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
     47 ; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
     48 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
     49 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
     50 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
     51 ; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
     52 ; GCN: buffer_store_dwordx2 [[RESULT]],
     53 ; GCN: s_endpgm
     54 define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
     55   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
     56   %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
     57   store i64 %result, i64 addrspace(1)* %out, align 8
     58   ret void
     59 }
     60 
     61 ; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64:
     62 ; SICIVI-DAG: s_mov_b32 m0
     63 ; GFX9-NOT: m0
     64 
     65 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
     66 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
     67 ; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
     68 ; GCN: buffer_store_dwordx2 [[RESULT]],
     69 ; GCN: s_endpgm
     70 define amdgpu_kernel void @lds_atomic_add1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
     71   %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
     72   store i64 %result, i64 addrspace(1)* %out, align 8
     73   ret void
     74 }
     75 
     76 ; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64_offset:
     77 ; SICIVI: s_mov_b32 m0
     78 ; GFX9-NOT: m0
     79 
     80 ; GCN: ds_add_rtn_u64 {{.*}} offset:32
     81 ; GCN: s_endpgm
     82 define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
     83   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
     84   %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
     85   store i64 %result, i64 addrspace(1)* %out, align 8
     86   ret void
     87 }
     88 
     89 ; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64:
     90 ; SICIVI: s_mov_b32 m0
     91 ; GFX9-NOT: m0
     92 
     93 ; GCN: ds_sub_rtn_u64
     94 ; GCN: s_endpgm
     95 define amdgpu_kernel void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
     96   %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
     97   store i64 %result, i64 addrspace(1)* %out, align 8
     98   ret void
     99 }
    100 
    101 ; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
    102 ; SICIVI: s_mov_b32 m0
    103 ; GFX9-NOT: m0
    104 
    105 ; GCN: ds_sub_rtn_u64 {{.*}} offset:32
    106 ; GCN: s_endpgm
    107 define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    108   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    109   %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
    110   store i64 %result, i64 addrspace(1)* %out, align 8
    111   ret void
    112 }
    113 
    114 ; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64:
    115 ; SICIVI-DAG: s_mov_b32 m0
    116 ; GFX9-NOT: m0
    117 
    118 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
    119 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
    120 ; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
    121 ; GCN: buffer_store_dwordx2 [[RESULT]],
    122 ; GCN: s_endpgm
    123 define amdgpu_kernel void @lds_atomic_sub1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    124   %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
    125   store i64 %result, i64 addrspace(1)* %out, align 8
    126   ret void
    127 }
    128 
    129 ; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64_offset:
    130 ; SICIVI: s_mov_b32 m0
    131 ; GFX9-NOT: m0
    132 
    133 ; GCN: ds_sub_rtn_u64 {{.*}} offset:32
    134 ; GCN: s_endpgm
    135 define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    136   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    137   %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
    138   store i64 %result, i64 addrspace(1)* %out, align 8
    139   ret void
    140 }
    141 
    142 ; GCN-LABEL: {{^}}lds_atomic_and_ret_i64:
    143 ; SICIVI: s_mov_b32 m0
    144 ; GFX9-NOT: m0
    145 
    146 ; GCN: ds_and_rtn_b64
    147 ; GCN: s_endpgm
    148 define amdgpu_kernel void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    149   %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
    150   store i64 %result, i64 addrspace(1)* %out, align 8
    151   ret void
    152 }
    153 
    154 ; GCN-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
    155 ; SICIVI: s_mov_b32 m0
    156 ; GFX9-NOT: m0
    157 
    158 ; GCN: ds_and_rtn_b64 {{.*}} offset:32
    159 ; GCN: s_endpgm
    160 define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    161   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    162   %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
    163   store i64 %result, i64 addrspace(1)* %out, align 8
    164   ret void
    165 }
    166 
    167 ; GCN-LABEL: {{^}}lds_atomic_or_ret_i64:
    168 ; SICIVI: s_mov_b32 m0
    169 ; GFX9-NOT: m0
    170 
    171 ; GCN: ds_or_rtn_b64
    172 ; GCN: s_endpgm
    173 define amdgpu_kernel void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    174   %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
    175   store i64 %result, i64 addrspace(1)* %out, align 8
    176   ret void
    177 }
    178 
    179 ; GCN-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
    180 ; SICIVI: s_mov_b32 m0
    181 ; GFX9-NOT: m0
    182 
    183 ; GCN: ds_or_rtn_b64 {{.*}} offset:32
    184 ; GCN: s_endpgm
    185 define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    186   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    187   %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
    188   store i64 %result, i64 addrspace(1)* %out, align 8
    189   ret void
    190 }
    191 
    192 ; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64:
    193 ; SICIVI: s_mov_b32 m0
    194 ; GFX9-NOT: m0
    195 
    196 ; GCN: ds_xor_rtn_b64
    197 ; GCN: s_endpgm
    198 define amdgpu_kernel void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    199   %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
    200   store i64 %result, i64 addrspace(1)* %out, align 8
    201   ret void
    202 }
    203 
    204 ; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
    205 ; SICIVI: s_mov_b32 m0
    206 ; GFX9-NOT: m0
    207 
    208 ; GCN: ds_xor_rtn_b64 {{.*}} offset:32
    209 ; GCN: s_endpgm
    210 define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    211   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    212   %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
    213   store i64 %result, i64 addrspace(1)* %out, align 8
    214   ret void
    215 }
    216 
    217 ; FIXME: There is no atomic nand instr
    218 ; XGCN-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
    219 ; define amdgpu_kernel void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    220 ;   %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
    221 ;   store i64 %result, i64 addrspace(1)* %out, align 8
    222 ;   ret void
    223 ; }
    224 
    225 ; GCN-LABEL: {{^}}lds_atomic_min_ret_i64:
    226 ; SICIVI: s_mov_b32 m0
    227 ; GFX9-NOT: m0
    228 
    229 ; GCN: ds_min_rtn_i64
    230 ; GCN: s_endpgm
    231 define amdgpu_kernel void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    232   %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
    233   store i64 %result, i64 addrspace(1)* %out, align 8
    234   ret void
    235 }
    236 
    237 ; GCN-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
    238 ; SICIVI: s_mov_b32 m0
    239 ; GFX9-NOT: m0
    240 
    241 ; GCN: ds_min_rtn_i64 {{.*}} offset:32
    242 ; GCN: s_endpgm
    243 define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    244   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    245   %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
    246   store i64 %result, i64 addrspace(1)* %out, align 8
    247   ret void
    248 }
    249 
    250 ; GCN-LABEL: {{^}}lds_atomic_max_ret_i64:
    251 ; SICIVI: s_mov_b32 m0
    252 ; GFX9-NOT: m0
    253 
    254 ; GCN: ds_max_rtn_i64
    255 ; GCN: s_endpgm
    256 define amdgpu_kernel void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    257   %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
    258   store i64 %result, i64 addrspace(1)* %out, align 8
    259   ret void
    260 }
    261 
    262 ; GCN-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
    263 ; SICIVI: s_mov_b32 m0
    264 ; GFX9-NOT: m0
    265 
    266 ; GCN: ds_max_rtn_i64 {{.*}} offset:32
    267 ; GCN: s_endpgm
    268 define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    269   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    270   %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
    271   store i64 %result, i64 addrspace(1)* %out, align 8
    272   ret void
    273 }
    274 
    275 ; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64:
    276 ; SICIVI: s_mov_b32 m0
    277 ; GFX9-NOT: m0
    278 
    279 ; GCN: ds_min_rtn_u64
    280 ; GCN: s_endpgm
    281 define amdgpu_kernel void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    282   %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
    283   store i64 %result, i64 addrspace(1)* %out, align 8
    284   ret void
    285 }
    286 
    287 ; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
    288 ; SICIVI: s_mov_b32 m0
    289 ; GFX9-NOT: m0
    290 
    291 ; GCN: ds_min_rtn_u64 {{.*}} offset:32
    292 ; GCN: s_endpgm
    293 define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    294   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    295   %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
    296   store i64 %result, i64 addrspace(1)* %out, align 8
    297   ret void
    298 }
    299 
    300 ; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64:
    301 ; SICIVI: s_mov_b32 m0
    302 ; GFX9-NOT: m0
    303 
    304 ; GCN: ds_max_rtn_u64
    305 ; GCN: s_endpgm
    306 define amdgpu_kernel void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    307   %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
    308   store i64 %result, i64 addrspace(1)* %out, align 8
    309   ret void
    310 }
    311 
    312 ; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
    313 ; SICIVI: s_mov_b32 m0
    314 ; GFX9-NOT: m0
    315 
    316 ; GCN: ds_max_rtn_u64 {{.*}} offset:32
    317 ; GCN: s_endpgm
    318 define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
    319   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    320   %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
    321   store i64 %result, i64 addrspace(1)* %out, align 8
    322   ret void
    323 }
    324 
    325 ; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64:
    326 ; SICIVI: s_mov_b32 m0
    327 ; GFX9-NOT: m0
    328 
    329 ; GCN: ds_wrxchg_rtn_b64
    330 ; GCN: s_endpgm
    331 define amdgpu_kernel void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    332   %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
    333   ret void
    334 }
    335 
    336 ; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
    337 ; SICIVI: s_mov_b32 m0
    338 ; GFX9-NOT: m0
    339 
    340 ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
    341 ; GCN: s_endpgm
    342 define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    343   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    344   %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
    345   ret void
    346 }
    347 
    348 ; GCN-LABEL: {{^}}lds_atomic_add_noret_i64:
    349 ; SICIVI: s_mov_b32 m0
    350 ; GFX9-NOT: m0
    351 
    352 ; GCN: ds_add_u64
    353 ; GCN: s_endpgm
    354 define amdgpu_kernel void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    355   %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
    356   ret void
    357 }
    358 
    359 ; GCN-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
    360 ; SICIVI-DAG: s_mov_b32 m0
    361 ; GFX9-NOT: m0
    362 
    363 ; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
    364 ; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
    365 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
    366 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
    367 ; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
    368 ; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
    369 ; GCN: s_endpgm
    370 define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    371   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
    372   %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
    373   ret void
    374 }
    375 
    376 ; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64:
    377 ; SICIVI-DAG: s_mov_b32 m0
    378 ; GFX9-NOT: m0
    379 
    380 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
    381 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
    382 ; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
    383 ; GCN: s_endpgm
    384 define amdgpu_kernel void @lds_atomic_add1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    385   %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
    386   ret void
    387 }
    388 
    389 ; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64_offset:
    390 ; SICIVI: s_mov_b32 m0
    391 ; GFX9-NOT: m0
    392 
    393 ; GCN: ds_add_u64 {{.*}} offset:32
    394 ; GCN: s_endpgm
    395 define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    396   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    397   %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
    398   ret void
    399 }
    400 
    401 ; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64:
    402 ; SICIVI: s_mov_b32 m0
    403 ; GFX9-NOT: m0
    404 
    405 ; GCN: ds_sub_u64
    406 ; GCN: s_endpgm
    407 define amdgpu_kernel void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    408   %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
    409   ret void
    410 }
    411 
    412 ; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
    413 ; SICIVI: s_mov_b32 m0
    414 ; GFX9-NOT: m0
    415 
    416 ; GCN: ds_sub_u64 {{.*}} offset:32
    417 ; GCN: s_endpgm
    418 define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    419   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    420   %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
    421   ret void
    422 }
    423 
    424 ; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64:
    425 ; SICIVI-DAG: s_mov_b32 m0
    426 ; GFX9-NOT: m0
    427 
    428 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
    429 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
    430 ; GCN: ds_sub_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
    431 ; GCN: s_endpgm
    432 define amdgpu_kernel void @lds_atomic_sub1_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    433   %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
    434   ret void
    435 }
    436 
    437 ; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64_offset:
    438 ; SICIVI: s_mov_b32 m0
    439 ; GFX9-NOT: m0
    440 
    441 ; GCN: ds_sub_u64 {{.*}} offset:32
    442 ; GCN: s_endpgm
    443 define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    444   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    445   %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
    446   ret void
    447 }
    448 
    449 ; GCN-LABEL: {{^}}lds_atomic_and_noret_i64:
    450 ; SICIVI: s_mov_b32 m0
    451 ; GFX9-NOT: m0
    452 
    453 ; GCN: ds_and_b64
    454 ; GCN: s_endpgm
    455 define amdgpu_kernel void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    456   %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
    457   ret void
    458 }
    459 
    460 ; GCN-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
    461 ; SICIVI: s_mov_b32 m0
    462 ; GFX9-NOT: m0
    463 
    464 ; GCN: ds_and_b64 {{.*}} offset:32
    465 ; GCN: s_endpgm
    466 define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    467   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    468   %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
    469   ret void
    470 }
    471 
    472 ; GCN-LABEL: {{^}}lds_atomic_or_noret_i64:
    473 ; SICIVI: s_mov_b32 m0
    474 ; GFX9-NOT: m0
    475 
    476 ; GCN: ds_or_b64
    477 ; GCN: s_endpgm
    478 define amdgpu_kernel void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    479   %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
    480   ret void
    481 }
    482 
    483 ; GCN-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
    484 ; SICIVI: s_mov_b32 m0
    485 ; GFX9-NOT: m0
    486 
    487 ; GCN: ds_or_b64 {{.*}} offset:32
    488 ; GCN: s_endpgm
    489 define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    490   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    491   %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
    492   ret void
    493 }
    494 
    495 ; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64:
    496 ; SICIVI: s_mov_b32 m0
    497 ; GFX9-NOT: m0
    498 
    499 ; GCN: ds_xor_b64
    500 ; GCN: s_endpgm
    501 define amdgpu_kernel void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    502   %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
    503   ret void
    504 }
    505 
    506 ; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
    507 ; SICIVI: s_mov_b32 m0
    508 ; GFX9-NOT: m0
    509 
    510 ; GCN: ds_xor_b64 {{.*}} offset:32
    511 ; GCN: s_endpgm
    512 define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    513   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    514   %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
    515   ret void
    516 }
    517 
    518 ; FIXME: There is no atomic nand instr
    519 ; XGCN-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this.
    520 ; define amdgpu_kernel void @lds_atomic_nand_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    521 ;   %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
    522 ;   ret void
    523 ; }
    524 
    525 ; GCN-LABEL: {{^}}lds_atomic_min_noret_i64:
    526 ; SICIVI: s_mov_b32 m0
    527 ; GFX9-NOT: m0
    528 
    529 ; GCN: ds_min_i64
    530 ; GCN: s_endpgm
    531 define amdgpu_kernel void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    532   %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
    533   ret void
    534 }
    535 
    536 ; GCN-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
    537 ; SICIVI: s_mov_b32 m0
    538 ; GFX9-NOT: m0
    539 
    540 ; GCN: ds_min_i64 {{.*}} offset:32
    541 ; GCN: s_endpgm
    542 define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    543   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    544   %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
    545   ret void
    546 }
    547 
    548 ; GCN-LABEL: {{^}}lds_atomic_max_noret_i64:
    549 ; SICIVI: s_mov_b32 m0
    550 ; GFX9-NOT: m0
    551 
    552 ; GCN: ds_max_i64
    553 ; GCN: s_endpgm
    554 define amdgpu_kernel void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    555   %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
    556   ret void
    557 }
    558 
    559 ; GCN-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
    560 ; SICIVI: s_mov_b32 m0
    561 ; GFX9-NOT: m0
    562 
    563 ; GCN: ds_max_i64 {{.*}} offset:32
    564 ; GCN: s_endpgm
    565 define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    566   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    567   %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
    568   ret void
    569 }
    570 
    571 ; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64:
    572 ; SICIVI: s_mov_b32 m0
    573 ; GFX9-NOT: m0
    574 
    575 ; GCN: ds_min_u64
    576 ; GCN: s_endpgm
    577 define amdgpu_kernel void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    578   %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
    579   ret void
    580 }
    581 
    582 ; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
    583 ; SICIVI: s_mov_b32 m0
    584 ; GFX9-NOT: m0
    585 
    586 ; GCN: ds_min_u64 {{.*}} offset:32
    587 ; GCN: s_endpgm
    588 define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    589   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    590   %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
    591   ret void
    592 }
    593 
    594 ; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64:
    595 ; SICIVI: s_mov_b32 m0
    596 ; GFX9-NOT: m0
    597 
    598 ; GCN: ds_max_u64
    599 ; GCN: s_endpgm
    600 define amdgpu_kernel void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    601   %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
    602   ret void
    603 }
    604 
    605 ; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
    606 ; SICIVI: s_mov_b32 m0
    607 ; GFX9-NOT: m0
    608 
    609 ; GCN: ds_max_u64 {{.*}} offset:32
    610 ; GCN: s_endpgm
    611 define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    612   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    613   %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
    614   ret void
    615 }
    616