Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI,FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,SICIVI,FUNC %s
      4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s
      5 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s
      6 
      7 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
      8 ; EG: LDS_WRXCHG_RET *
      9 
     10 ; SICIVI-DAG: s_mov_b32 m0
     11 ; GFX9-NOT: m0
     12 
     13 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
     14 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
     15 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
     16 ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
     17 ; GCN: buffer_store_dword [[RESULT]],
     18 ; GCN: s_endpgm
     19 define amdgpu_kernel void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
     20   %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
     21   store i32 %result, i32 addrspace(1)* %out, align 4
     22   ret void
     23 }
     24 
     25 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
     26 ; SICIVI: s_mov_b32 m0
     27 ; GFX9-NOT: m0
     28 
     29 ; EG: LDS_WRXCHG_RET *
     30 ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
     31 ; GCN: s_endpgm
     32 define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
     33   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
     34   %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
     35   store i32 %result, i32 addrspace(1)* %out, align 4
     36   ret void
     37 }
     38 
     39 ; XXX - Is it really necessary to load 4 into VGPR?
     40 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
     41 ; EG: LDS_ADD_RET *
     42 
     43 ; SICIVI-DAG: s_mov_b32 m0
     44 ; GFX9-NOT: m0
     45 
     46 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
     47 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
     48 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
     49 ; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
     50 ; GCN: buffer_store_dword [[RESULT]],
     51 ; GCN: s_endpgm
     52 define amdgpu_kernel void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
     53   %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
     54   store i32 %result, i32 addrspace(1)* %out, align 4
     55   ret void
     56 }
     57 
     58 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
     59 ; SICIVI: s_mov_b32 m0
     60 ; GFX9-NOT: m0
     61 
     62 ; EG: LDS_ADD_RET *
     63 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
     64 ; GCN: s_endpgm
     65 define amdgpu_kernel void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
     66   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
     67   %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
     68   store i32 %result, i32 addrspace(1)* %out, align 4
     69   ret void
     70 }
     71 
     72 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
     73 ; SICIVI: s_mov_b32 m0
     74 ; GFX9-NOT: m0
     75 
     76 ; EG: LDS_ADD_RET *
     77 ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     78 ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
     79 ; GCN: s_endpgm
     80 define amdgpu_kernel void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
     81   %sub = sub i32 %a, %b
     82   %add = add i32 %sub, 4
     83   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
     84   %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
     85   store i32 %result, i32 addrspace(1)* %out, align 4
     86   ret void
     87 }
     88 
     89 ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32:
     90 ; EG: LDS_ADD_RET *
     91 
     92 ; SICIVI-DAG: s_mov_b32 m0
     93 ; GFX9-NOT: m0
     94 
     95 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
     96 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]]
     97 ; GCN: s_endpgm
     98 define amdgpu_kernel void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
     99   %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
    100   store i32 %result, i32 addrspace(1)* %out, align 4
    101   ret void
    102 }
    103 
    104 ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_offset:
    105 ; EG: LDS_ADD_RET *
    106 
    107 ; SICIVI-DAG: s_mov_b32 m0
    108 ; GFX9-NOT: m0
    109 
    110 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
    111 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16
    112 ; GCN: s_endpgm
    113 define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    114   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    115   %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
    116   store i32 %result, i32 addrspace(1)* %out, align 4
    117   ret void
    118 }
    119 
    120 ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_bad_si_offset:
    121 ; SICIVI: s_mov_b32 m0
    122 ; GFX9-NOT: m0
    123 
    124 ; EG: LDS_ADD_RET *
    125 ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    126 ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
    127 ; GCN: s_endpgm
    128 define amdgpu_kernel void @lds_atomic_add1_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
    129   %sub = sub i32 %a, %b
    130   %add = add i32 %sub, 4
    131   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
    132   %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
    133   store i32 %result, i32 addrspace(1)* %out, align 4
    134   ret void
    135 }
    136 
    137 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
    138 ; EG: LDS_SUB_RET *
    139 
    140 ; SICIVI: s_mov_b32 m0
    141 ; GFX9-NOT: m0
    142 
    143 ; GCN: ds_sub_rtn_u32
    144 ; GCN: s_endpgm
    145 define amdgpu_kernel void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    146   %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
    147   store i32 %result, i32 addrspace(1)* %out, align 4
    148   ret void
    149 }
    150 
    151 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
    152 ; EG: LDS_SUB_RET *
    153 
    154 ; SICIVI: s_mov_b32 m0
    155 ; GFX9-NOT: m0
    156 
    157 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
    158 ; GCN: s_endpgm
    159 define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    160   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    161   %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
    162   store i32 %result, i32 addrspace(1)* %out, align 4
    163   ret void
    164 }
    165 
    166 ; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32:
    167 ; EG: LDS_SUB_RET *
    168 
    169 ; SICIVI-DAG: s_mov_b32 m0
    170 ; GFX9-NOT: m0
    171 
    172 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
    173 ; GCN: ds_sub_rtn_u32  v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]]
    174 ; GCN: s_endpgm
    175 define amdgpu_kernel void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    176   %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
    177   store i32 %result, i32 addrspace(1)* %out, align 4
    178   ret void
    179 }
    180 
    181 ; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32_offset:
    182 ; EG: LDS_SUB_RET *
    183 
    184 ; SICIVI-DAG: s_mov_b32 m0
    185 ; GFX9-NOT: m0
    186 
    187 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
    188 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16
    189 ; GCN: s_endpgm
    190 define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    191   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    192   %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
    193   store i32 %result, i32 addrspace(1)* %out, align 4
    194   ret void
    195 }
    196 
    197 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
    198 ; EG: LDS_AND_RET *
    199 
    200 ; SICIVI-DAG: s_mov_b32 m0
    201 ; GFX9-NOT: m0
    202 
    203 ; GCN: ds_and_rtn_b32
    204 ; GCN: s_endpgm
    205 define amdgpu_kernel void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    206   %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
    207   store i32 %result, i32 addrspace(1)* %out, align 4
    208   ret void
    209 }
    210 
    211 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
    212 ; SICIVI: s_mov_b32 m0
    213 ; GFX9-NOT: m0
    214 
    215 ; EG: LDS_AND_RET *
    216 ; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
    217 ; GCN: s_endpgm
    218 define amdgpu_kernel void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    219   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    220   %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
    221   store i32 %result, i32 addrspace(1)* %out, align 4
    222   ret void
    223 }
    224 
    225 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
    226 ; SICIVI: s_mov_b32 m0
    227 ; GFX9-NOT: m0
    228 
    229 ; EG: LDS_OR_RET *
    230 ; GCN: ds_or_rtn_b32
    231 ; GCN: s_endpgm
    232 define amdgpu_kernel void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    233   %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
    234   store i32 %result, i32 addrspace(1)* %out, align 4
    235   ret void
    236 }
    237 
    238 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
    239 ; SICIVI: s_mov_b32 m0
    240 ; GFX9-NOT: m0
    241 
    242 ; EG: LDS_OR_RET *
    243 ; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
    244 ; GCN: s_endpgm
    245 define amdgpu_kernel void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    246   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    247   %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
    248   store i32 %result, i32 addrspace(1)* %out, align 4
    249   ret void
    250 }
    251 
    252 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
    253 ; SICIVI: s_mov_b32 m0
    254 ; GFX9-NOT: m0
    255 
    256 ; EG: LDS_XOR_RET *
    257 ; GCN: ds_xor_rtn_b32
    258 ; GCN: s_endpgm
    259 define amdgpu_kernel void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    260   %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
    261   store i32 %result, i32 addrspace(1)* %out, align 4
    262   ret void
    263 }
    264 
    265 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
    266 ; SICIVI: s_mov_b32 m0
    267 ; GFX9-NOT: m0
    268 
    269 ; EG: LDS_XOR_RET *
    270 ; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
    271 ; GCN: s_endpgm
    272 define amdgpu_kernel void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    273   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    274   %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
    275   store i32 %result, i32 addrspace(1)* %out, align 4
    276   ret void
    277 }
    278 
    279 ; FIXME: There is no atomic nand instr
    280 ; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i32:uction, so we somehow need to expand this.
    281 ; define amdgpu_kernel void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    282 ;   %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
    283 ;   store i32 %result, i32 addrspace(1)* %out, align 4
    284 ;   ret void
    285 ; }
    286 
    287 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
    288 ; SICIVI: s_mov_b32 m0
    289 ; GFX9-NOT: m0
    290 
    291 ; EG: LDS_MIN_INT_RET *
    292 ; GCN: ds_min_rtn_i32
    293 ; GCN: s_endpgm
    294 define amdgpu_kernel void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    295   %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
    296   store i32 %result, i32 addrspace(1)* %out, align 4
    297   ret void
    298 }
    299 
    300 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
    301 ; SICIVI: s_mov_b32 m0
    302 ; GFX9-NOT: m0
    303 
    304 ; EG: LDS_MIN_INT_RET *
    305 ; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
    306 ; GCN: s_endpgm
    307 define amdgpu_kernel void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    308   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    309   %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
    310   store i32 %result, i32 addrspace(1)* %out, align 4
    311   ret void
    312 }
    313 
    314 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
    315 ; SICIVI: s_mov_b32 m0
    316 ; GFX9-NOT: m0
    317 
    318 ; EG: LDS_MAX_INT_RET *
    319 ; GCN: ds_max_rtn_i32
    320 ; GCN: s_endpgm
    321 define amdgpu_kernel void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    322   %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
    323   store i32 %result, i32 addrspace(1)* %out, align 4
    324   ret void
    325 }
    326 
    327 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
    328 ; SICIVI: s_mov_b32 m0
    329 ; GFX9-NOT: m0
    330 
    331 ; EG: LDS_MAX_INT_RET *
    332 ; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
    333 ; GCN: s_endpgm
    334 define amdgpu_kernel void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    335   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    336   %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
    337   store i32 %result, i32 addrspace(1)* %out, align 4
    338   ret void
    339 }
    340 
    341 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
    342 ; SICIVI: s_mov_b32 m0
    343 ; GFX9-NOT: m0
    344 
    345 ; EG: LDS_MIN_UINT_RET *
    346 ; GCN: ds_min_rtn_u32
    347 ; GCN: s_endpgm
    348 define amdgpu_kernel void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    349   %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
    350   store i32 %result, i32 addrspace(1)* %out, align 4
    351   ret void
    352 }
    353 
    354 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
    355 ; SICIVI: s_mov_b32 m0
    356 ; GFX9-NOT: m0
    357 
    358 ; EG: LDS_MIN_UINT_RET *
    359 ; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
    360 ; GCN: s_endpgm
    361 define amdgpu_kernel void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    362   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    363   %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
    364   store i32 %result, i32 addrspace(1)* %out, align 4
    365   ret void
    366 }
    367 
    368 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
    369 ; SICIVI: s_mov_b32 m0
    370 ; GFX9-NOT: m0
    371 
    372 ; EG: LDS_MAX_UINT_RET *
    373 ; GCN: ds_max_rtn_u32
    374 ; GCN: s_endpgm
    375 define amdgpu_kernel void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    376   %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
    377   store i32 %result, i32 addrspace(1)* %out, align 4
    378   ret void
    379 }
    380 
    381 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
    382 ; SICIVI: s_mov_b32 m0
    383 ; GFX9-NOT: m0
    384 
    385 ; EG: LDS_MAX_UINT_RET *
    386 ; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
    387 ; GCN: s_endpgm
    388 define amdgpu_kernel void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
    389   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    390   %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
    391   store i32 %result, i32 addrspace(1)* %out, align 4
    392   ret void
    393 }
    394 
    395 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
    396 ; SICIVI-DAG: s_mov_b32 m0
    397 ; GFX9-NOT: m0
    398 
    399 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
    400 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
    401 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
    402 ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
    403 ; GCN: s_endpgm
    404 define amdgpu_kernel void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    405   %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
    406   ret void
    407 }
    408 
    409 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
    410 ; SICIVI: s_mov_b32 m0
    411 ; GFX9-NOT: m0
    412 
    413 ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
    414 ; GCN: s_endpgm
    415 define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
    416   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    417   %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
    418   ret void
    419 }
    420 
    421 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
    422 ; SICIVI-DAG: s_mov_b32 m0
    423 ; GFX9-NOT: m0
    424 
    425 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
    426 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
    427 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
    428 ; GCN: ds_add_u32 [[VPTR]], [[DATA]]
    429 ; GCN: s_endpgm
    430 define amdgpu_kernel void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    431   %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
    432   ret void
    433 }
    434 
    435 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
    436 ; SICIVI: s_mov_b32 m0
    437 ; GFX9-NOT: m0
    438 
    439 ; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
    440 ; GCN: s_endpgm
    441 define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
    442   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    443   %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
    444   ret void
    445 }
    446 
    447 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
    448 ; SICIVI: s_mov_b32 m0
    449 ; GFX9-NOT: m0
    450 
    451 ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
    452 ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
    453 ; GCN: s_endpgm
    454 define amdgpu_kernel void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
    455   %sub = sub i32 %a, %b
    456   %add = add i32 %sub, 4
    457   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
    458   %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
    459   ret void
    460 }
    461 
    462 ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32:
    463 ; SICIVI-DAG: s_mov_b32 m0
    464 ; GFX9-NOT: m0
    465 
    466 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
    467 ; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]]
    468 ; GCN: s_endpgm
    469 define amdgpu_kernel void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    470   %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
    471   ret void
    472 }
    473 
    474 ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_offset:
    475 ; SICIVI-DAG: s_mov_b32 m0
    476 ; GFX9-NOT: m0
    477 
    478 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
    479 ; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] offset:16
    480 ; GCN: s_endpgm
    481 define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
    482   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    483   %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
    484   ret void
    485 }
    486 
    487 ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_bad_si_offset:
    488 ; SICIVI: s_mov_b32 m0
    489 ; GFX9-NOT: m0
    490 
    491 ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
    492 ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
    493 ; GCN: s_endpgm
    494 define amdgpu_kernel void @lds_atomic_add1_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
    495   %sub = sub i32 %a, %b
    496   %add = add i32 %sub, 4
    497   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
    498   %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
    499   ret void
    500 }
    501 
    502 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
    503 ; SICIVI: s_mov_b32 m0
    504 ; GFX9-NOT: m0
    505 
    506 ; GCN: ds_sub_u32
    507 ; GCN: s_endpgm
    508 define amdgpu_kernel void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    509   %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
    510   ret void
    511 }
    512 
    513 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
    514 ; SICIVI: s_mov_b32 m0
    515 ; GFX9-NOT: m0
    516 
    517 ; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
    518 ; GCN: s_endpgm
    519 define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
    520   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    521   %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
    522   ret void
    523 }
    524 
    525 ; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32:
    526 ; SICIVI-DAG: s_mov_b32 m0
    527 ; GFX9-NOT: m0
    528 
    529 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
    530 ; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]]
    531 ; GCN: s_endpgm
    532 define amdgpu_kernel void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    533   %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
    534   ret void
    535 }
    536 
    537 ; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32_offset:
    538 ; SICIVI-DAG: s_mov_b32 m0
    539 ; GFX9-NOT: m0
    540 
    541 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}}
    542 ; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] offset:16
    543 ; GCN: s_endpgm
    544 define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
    545   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    546   %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
    547   ret void
    548 }
    549 
    550 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
    551 ; SICIVI: s_mov_b32 m0
    552 ; GFX9-NOT: m0
    553 
    554 ; GCN: ds_and_b32
    555 ; GCN: s_endpgm
    556 define amdgpu_kernel void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    557   %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
    558   ret void
    559 }
    560 
    561 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
    562 ; SICIVI: s_mov_b32 m0
    563 ; GFX9-NOT: m0
    564 
    565 ; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
    566 ; GCN: s_endpgm
    567 define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
    568   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    569   %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
    570   ret void
    571 }
    572 
    573 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
    574 ; SICIVI: s_mov_b32 m0
    575 ; GFX9-NOT: m0
    576 
    577 ; GCN: ds_or_b32
    578 ; GCN: s_endpgm
    579 define amdgpu_kernel void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    580   %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
    581   ret void
    582 }
    583 
    584 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
    585 ; SICIVI: s_mov_b32 m0
    586 ; GFX9-NOT: m0
    587 
    588 ; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
    589 ; GCN: s_endpgm
    590 define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
    591   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    592   %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
    593   ret void
    594 }
    595 
    596 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
    597 ; SICIVI: s_mov_b32 m0
    598 ; GFX9-NOT: m0
    599 
    600 ; GCN: ds_xor_b32
    601 ; GCN: s_endpgm
    602 define amdgpu_kernel void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    603   %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
    604   ret void
    605 }
    606 
    607 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
    608 ; SICIVI: s_mov_b32 m0
    609 ; GFX9-NOT: m0
    610 
    611 ; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
    612 ; GCN: s_endpgm
    613 define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
    614   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    615   %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
    616   ret void
    617 }
    618 
    619 ; FIXME: There is no atomic nand instr
    620 ; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i32:uction, so we somehow need to expand this.
    621 ; define amdgpu_kernel void @lds_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    622 ;   %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
    623 ;   ret void
    624 ; }
    625 
    626 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
    627 ; SICIVI: s_mov_b32 m0
    628 ; GFX9-NOT: m0
    629 
    630 ; GCN: ds_min_i32
    631 ; GCN: s_endpgm
    632 define amdgpu_kernel void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    633   %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
    634   ret void
    635 }
    636 
    637 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
    638 ; SICIVI: s_mov_b32 m0
    639 ; GFX9-NOT: m0
    640 
    641 ; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
    642 ; GCN: s_endpgm
    643 define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
    644   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    645   %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
    646   ret void
    647 }
    648 
    649 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
    650 ; SICIVI: s_mov_b32 m0
    651 ; GFX9-NOT: m0
    652 
    653 ; GCN: ds_max_i32
    654 ; GCN: s_endpgm
    655 define amdgpu_kernel void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    656   %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
    657   ret void
    658 }
    659 
    660 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
    661 ; SICIVI: s_mov_b32 m0
    662 ; GFX9-NOT: m0
    663 
    664 ; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
    665 ; GCN: s_endpgm
    666 define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
    667   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    668   %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
    669   ret void
    670 }
    671 
    672 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
    673 ; SICIVI: s_mov_b32 m0
    674 ; GFX9-NOT: m0
    675 
    676 ; GCN: ds_min_u32
    677 ; GCN: s_endpgm
    678 define amdgpu_kernel void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    679   %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
    680   ret void
    681 }
    682 
    683 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
    684 ; SICIVI: s_mov_b32 m0
    685 ; GFX9-NOT: m0
    686 
    687 ; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
    688 ; GCN: s_endpgm
    689 define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
    690   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    691   %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
    692   ret void
    693 }
    694 
    695 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
    696 ; SICIVI: s_mov_b32 m0
    697 ; GFX9-NOT: m0
    698 
    699 ; GCN: ds_max_u32
    700 ; GCN: s_endpgm
    701 define amdgpu_kernel void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
    702   %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
    703   ret void
    704 }
    705 
    706 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
    707 ; SICIVI: s_mov_b32 m0
    708 ; GFX9-NOT: m0
    709 
    710 ; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
    711 ; GCN: s_endpgm
    712 define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
    713   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
    714   %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
    715   ret void
    716 }
    717