1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI,FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,FUNC %s 3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,SICIVI,FUNC %s 4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s 5 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s 6 7 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32: 8 ; EG: LDS_WRXCHG_RET * 9 10 ; SICIVI-DAG: s_mov_b32 m0 11 ; GFX9-NOT: m0 12 13 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 14 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 15 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 16 ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] 17 ; GCN: buffer_store_dword [[RESULT]], 18 ; GCN: s_endpgm 19 define amdgpu_kernel void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 20 %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst 21 store i32 %result, i32 addrspace(1)* %out, align 4 22 ret void 23 } 24 25 ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset: 26 ; SICIVI: s_mov_b32 m0 27 ; GFX9-NOT: m0 28 29 ; EG: LDS_WRXCHG_RET * 30 ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 31 ; GCN: s_endpgm 32 define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 33 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 34 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst 35 store i32 %result, i32 addrspace(1)* %out, align 4 36 ret void 37 } 38 39 ; XXX - Is it really necessary to load 4 into VGPR? 40 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32: 41 ; EG: LDS_ADD_RET * 42 43 ; SICIVI-DAG: s_mov_b32 m0 44 ; GFX9-NOT: m0 45 46 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 47 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 48 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 49 ; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] 50 ; GCN: buffer_store_dword [[RESULT]], 51 ; GCN: s_endpgm 52 define amdgpu_kernel void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 53 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst 54 store i32 %result, i32 addrspace(1)* %out, align 4 55 ret void 56 } 57 58 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset: 59 ; SICIVI: s_mov_b32 m0 60 ; GFX9-NOT: m0 61 62 ; EG: LDS_ADD_RET * 63 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 64 ; GCN: s_endpgm 65 define amdgpu_kernel void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 66 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 67 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 68 store i32 %result, i32 addrspace(1)* %out, align 4 69 ret void 70 } 71 72 ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset: 73 ; SICIVI: s_mov_b32 m0 74 ; GFX9-NOT: m0 75 76 ; EG: LDS_ADD_RET * 77 ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 78 ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 79 ; GCN: s_endpgm 80 define amdgpu_kernel void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { 81 %sub = sub i32 %a, %b 82 %add = add i32 %sub, 4 83 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add 84 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 85 store i32 %result, i32 addrspace(1)* %out, align 4 86 ret void 87 } 88 89 ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32: 90 ; EG: LDS_ADD_RET * 91 92 ; SICIVI-DAG: s_mov_b32 m0 93 ; GFX9-NOT: m0 94 95 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 96 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] 97 ; GCN: s_endpgm 98 define amdgpu_kernel void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 99 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst 100 store i32 %result, i32 addrspace(1)* %out, align 4 101 ret void 102 } 103 104 ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_offset: 105 ; EG: LDS_ADD_RET * 106 107 ; SICIVI-DAG: s_mov_b32 m0 108 ; GFX9-NOT: m0 109 110 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 111 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16 112 ; GCN: s_endpgm 113 define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 114 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 115 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 116 store i32 %result, i32 addrspace(1)* %out, align 4 117 ret void 118 } 119 120 ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_bad_si_offset: 121 ; SICIVI: s_mov_b32 m0 122 ; GFX9-NOT: m0 123 124 ; EG: LDS_ADD_RET * 125 ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 126 ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 127 ; GCN: s_endpgm 128 define amdgpu_kernel void @lds_atomic_add1_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { 129 %sub = sub i32 %a, %b 130 %add = add i32 %sub, 4 131 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add 132 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 133 store i32 %result, i32 addrspace(1)* %out, align 4 134 ret void 135 } 136 137 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32: 138 ; EG: LDS_SUB_RET * 139 140 ; SICIVI: s_mov_b32 m0 141 ; GFX9-NOT: m0 142 143 ; GCN: ds_sub_rtn_u32 144 ; GCN: s_endpgm 145 define amdgpu_kernel void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 146 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst 147 store i32 %result, i32 addrspace(1)* %out, align 4 148 ret void 149 } 150 151 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset: 152 ; EG: LDS_SUB_RET * 153 154 ; SICIVI: s_mov_b32 m0 155 ; GFX9-NOT: m0 156 157 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 158 ; GCN: s_endpgm 159 define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 160 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 161 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst 162 store i32 %result, i32 addrspace(1)* %out, align 4 163 ret void 164 } 165 166 ; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32: 167 ; EG: LDS_SUB_RET * 168 169 ; SICIVI-DAG: s_mov_b32 m0 170 ; GFX9-NOT: m0 171 172 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 173 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] 174 ; GCN: s_endpgm 175 define amdgpu_kernel void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 176 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst 177 store i32 %result, i32 addrspace(1)* %out, align 4 178 ret void 179 } 180 181 ; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32_offset: 182 ; EG: LDS_SUB_RET * 183 184 ; SICIVI-DAG: s_mov_b32 m0 185 ; GFX9-NOT: m0 186 187 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 188 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16 189 ; GCN: s_endpgm 190 define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 191 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 192 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst 193 store i32 %result, i32 addrspace(1)* %out, align 4 194 ret void 195 } 196 197 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32: 198 ; EG: LDS_AND_RET * 199 200 ; SICIVI-DAG: s_mov_b32 m0 201 ; GFX9-NOT: m0 202 203 ; GCN: ds_and_rtn_b32 204 ; GCN: s_endpgm 205 define amdgpu_kernel void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 206 %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst 207 store i32 %result, i32 addrspace(1)* %out, align 4 208 ret void 209 } 210 211 ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset: 212 ; SICIVI: s_mov_b32 m0 213 ; GFX9-NOT: m0 214 215 ; EG: LDS_AND_RET * 216 ; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 217 ; GCN: s_endpgm 218 define amdgpu_kernel void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 219 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 220 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst 221 store i32 %result, i32 addrspace(1)* %out, align 4 222 ret void 223 } 224 225 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32: 226 ; SICIVI: s_mov_b32 m0 227 ; GFX9-NOT: m0 228 229 ; EG: LDS_OR_RET * 230 ; GCN: ds_or_rtn_b32 231 ; GCN: s_endpgm 232 define amdgpu_kernel void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 233 %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst 234 store i32 %result, i32 addrspace(1)* %out, align 4 235 ret void 236 } 237 238 ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset: 239 ; SICIVI: s_mov_b32 m0 240 ; GFX9-NOT: m0 241 242 ; EG: LDS_OR_RET * 243 ; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 244 ; GCN: s_endpgm 245 define amdgpu_kernel void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 246 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 247 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst 248 store i32 %result, i32 addrspace(1)* %out, align 4 249 ret void 250 } 251 252 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32: 253 ; SICIVI: s_mov_b32 m0 254 ; GFX9-NOT: m0 255 256 ; EG: LDS_XOR_RET * 257 ; GCN: ds_xor_rtn_b32 258 ; GCN: s_endpgm 259 define amdgpu_kernel void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 260 %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst 261 store i32 %result, i32 addrspace(1)* %out, align 4 262 ret void 263 } 264 265 ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset: 266 ; SICIVI: s_mov_b32 m0 267 ; GFX9-NOT: m0 268 269 ; EG: LDS_XOR_RET * 270 ; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 271 ; GCN: s_endpgm 272 define amdgpu_kernel void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 273 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 274 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst 275 store i32 %result, i32 addrspace(1)* %out, align 4 276 ret void 277 } 278 279 ; FIXME: There is no atomic nand instr 280 ; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i32:uction, so we somehow need to expand this. 281 ; define amdgpu_kernel void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 282 ; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst 283 ; store i32 %result, i32 addrspace(1)* %out, align 4 284 ; ret void 285 ; } 286 287 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32: 288 ; SICIVI: s_mov_b32 m0 289 ; GFX9-NOT: m0 290 291 ; EG: LDS_MIN_INT_RET * 292 ; GCN: ds_min_rtn_i32 293 ; GCN: s_endpgm 294 define amdgpu_kernel void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 295 %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst 296 store i32 %result, i32 addrspace(1)* %out, align 4 297 ret void 298 } 299 300 ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset: 301 ; SICIVI: s_mov_b32 m0 302 ; GFX9-NOT: m0 303 304 ; EG: LDS_MIN_INT_RET * 305 ; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 306 ; GCN: s_endpgm 307 define amdgpu_kernel void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 308 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 309 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst 310 store i32 %result, i32 addrspace(1)* %out, align 4 311 ret void 312 } 313 314 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32: 315 ; SICIVI: s_mov_b32 m0 316 ; GFX9-NOT: m0 317 318 ; EG: LDS_MAX_INT_RET * 319 ; GCN: ds_max_rtn_i32 320 ; GCN: s_endpgm 321 define amdgpu_kernel void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 322 %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst 323 store i32 %result, i32 addrspace(1)* %out, align 4 324 ret void 325 } 326 327 ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset: 328 ; SICIVI: s_mov_b32 m0 329 ; GFX9-NOT: m0 330 331 ; EG: LDS_MAX_INT_RET * 332 ; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 333 ; GCN: s_endpgm 334 define amdgpu_kernel void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 335 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 336 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst 337 store i32 %result, i32 addrspace(1)* %out, align 4 338 ret void 339 } 340 341 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32: 342 ; SICIVI: s_mov_b32 m0 343 ; GFX9-NOT: m0 344 345 ; EG: LDS_MIN_UINT_RET * 346 ; GCN: ds_min_rtn_u32 347 ; GCN: s_endpgm 348 define amdgpu_kernel void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 349 %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst 350 store i32 %result, i32 addrspace(1)* %out, align 4 351 ret void 352 } 353 354 ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset: 355 ; SICIVI: s_mov_b32 m0 356 ; GFX9-NOT: m0 357 358 ; EG: LDS_MIN_UINT_RET * 359 ; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 360 ; GCN: s_endpgm 361 define amdgpu_kernel void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 362 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 363 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst 364 store i32 %result, i32 addrspace(1)* %out, align 4 365 ret void 366 } 367 368 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32: 369 ; SICIVI: s_mov_b32 m0 370 ; GFX9-NOT: m0 371 372 ; EG: LDS_MAX_UINT_RET * 373 ; GCN: ds_max_rtn_u32 374 ; GCN: s_endpgm 375 define amdgpu_kernel void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 376 %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst 377 store i32 %result, i32 addrspace(1)* %out, align 4 378 ret void 379 } 380 381 ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset: 382 ; SICIVI: s_mov_b32 m0 383 ; GFX9-NOT: m0 384 385 ; EG: LDS_MAX_UINT_RET * 386 ; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 387 ; GCN: s_endpgm 388 define amdgpu_kernel void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { 389 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 390 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst 391 store i32 %result, i32 addrspace(1)* %out, align 4 392 ret void 393 } 394 395 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32: 396 ; SICIVI-DAG: s_mov_b32 m0 397 ; GFX9-NOT: m0 398 399 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 400 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 401 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 402 ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] 403 ; GCN: s_endpgm 404 define amdgpu_kernel void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind { 405 %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst 406 ret void 407 } 408 409 ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset: 410 ; SICIVI: s_mov_b32 m0 411 ; GFX9-NOT: m0 412 413 ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 414 ; GCN: s_endpgm 415 define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 416 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 417 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst 418 ret void 419 } 420 421 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32: 422 ; SICIVI-DAG: s_mov_b32 m0 423 ; GFX9-NOT: m0 424 425 ; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 426 ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 427 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 428 ; GCN: ds_add_u32 [[VPTR]], [[DATA]] 429 ; GCN: s_endpgm 430 define amdgpu_kernel void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind { 431 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst 432 ret void 433 } 434 435 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset: 436 ; SICIVI: s_mov_b32 m0 437 ; GFX9-NOT: m0 438 439 ; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 440 ; GCN: s_endpgm 441 define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 442 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 443 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 444 ret void 445 } 446 447 ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset 448 ; SICIVI: s_mov_b32 m0 449 ; GFX9-NOT: m0 450 451 ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} 452 ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 453 ; GCN: s_endpgm 454 define amdgpu_kernel void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { 455 %sub = sub i32 %a, %b 456 %add = add i32 %sub, 4 457 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add 458 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst 459 ret void 460 } 461 462 ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32: 463 ; SICIVI-DAG: s_mov_b32 m0 464 ; GFX9-NOT: m0 465 466 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 467 ; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] 468 ; GCN: s_endpgm 469 define amdgpu_kernel void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nounwind { 470 %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst 471 ret void 472 } 473 474 ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_offset: 475 ; SICIVI-DAG: s_mov_b32 m0 476 ; GFX9-NOT: m0 477 478 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 479 ; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] offset:16 480 ; GCN: s_endpgm 481 define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 482 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 483 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 484 ret void 485 } 486 487 ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_bad_si_offset: 488 ; SICIVI: s_mov_b32 m0 489 ; GFX9-NOT: m0 490 491 ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} 492 ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 493 ; GCN: s_endpgm 494 define amdgpu_kernel void @lds_atomic_add1_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { 495 %sub = sub i32 %a, %b 496 %add = add i32 %sub, 4 497 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add 498 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst 499 ret void 500 } 501 502 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32: 503 ; SICIVI: s_mov_b32 m0 504 ; GFX9-NOT: m0 505 506 ; GCN: ds_sub_u32 507 ; GCN: s_endpgm 508 define amdgpu_kernel void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind { 509 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst 510 ret void 511 } 512 513 ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset: 514 ; SICIVI: s_mov_b32 m0 515 ; GFX9-NOT: m0 516 517 ; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 518 ; GCN: s_endpgm 519 define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 520 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 521 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst 522 ret void 523 } 524 525 ; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32: 526 ; SICIVI-DAG: s_mov_b32 m0 527 ; GFX9-NOT: m0 528 529 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 530 ; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] 531 ; GCN: s_endpgm 532 define amdgpu_kernel void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nounwind { 533 %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst 534 ret void 535 } 536 537 ; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32_offset: 538 ; SICIVI-DAG: s_mov_b32 m0 539 ; GFX9-NOT: m0 540 541 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} 542 ; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] offset:16 543 ; GCN: s_endpgm 544 define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 545 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 546 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst 547 ret void 548 } 549 550 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32: 551 ; SICIVI: s_mov_b32 m0 552 ; GFX9-NOT: m0 553 554 ; GCN: ds_and_b32 555 ; GCN: s_endpgm 556 define amdgpu_kernel void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind { 557 %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst 558 ret void 559 } 560 561 ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset: 562 ; SICIVI: s_mov_b32 m0 563 ; GFX9-NOT: m0 564 565 ; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 566 ; GCN: s_endpgm 567 define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 568 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 569 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst 570 ret void 571 } 572 573 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32: 574 ; SICIVI: s_mov_b32 m0 575 ; GFX9-NOT: m0 576 577 ; GCN: ds_or_b32 578 ; GCN: s_endpgm 579 define amdgpu_kernel void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind { 580 %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst 581 ret void 582 } 583 584 ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset: 585 ; SICIVI: s_mov_b32 m0 586 ; GFX9-NOT: m0 587 588 ; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 589 ; GCN: s_endpgm 590 define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 591 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 592 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst 593 ret void 594 } 595 596 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32: 597 ; SICIVI: s_mov_b32 m0 598 ; GFX9-NOT: m0 599 600 ; GCN: ds_xor_b32 601 ; GCN: s_endpgm 602 define amdgpu_kernel void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind { 603 %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst 604 ret void 605 } 606 607 ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset: 608 ; SICIVI: s_mov_b32 m0 609 ; GFX9-NOT: m0 610 611 ; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 612 ; GCN: s_endpgm 613 define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 614 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 615 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst 616 ret void 617 } 618 619 ; FIXME: There is no atomic nand instr 620 ; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i32:uction, so we somehow need to expand this. 621 ; define amdgpu_kernel void @lds_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind { 622 ; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst 623 ; ret void 624 ; } 625 626 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32: 627 ; SICIVI: s_mov_b32 m0 628 ; GFX9-NOT: m0 629 630 ; GCN: ds_min_i32 631 ; GCN: s_endpgm 632 define amdgpu_kernel void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind { 633 %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst 634 ret void 635 } 636 637 ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset: 638 ; SICIVI: s_mov_b32 m0 639 ; GFX9-NOT: m0 640 641 ; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 642 ; GCN: s_endpgm 643 define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 644 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 645 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst 646 ret void 647 } 648 649 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32: 650 ; SICIVI: s_mov_b32 m0 651 ; GFX9-NOT: m0 652 653 ; GCN: ds_max_i32 654 ; GCN: s_endpgm 655 define amdgpu_kernel void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind { 656 %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst 657 ret void 658 } 659 660 ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset: 661 ; SICIVI: s_mov_b32 m0 662 ; GFX9-NOT: m0 663 664 ; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 665 ; GCN: s_endpgm 666 define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 667 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 668 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst 669 ret void 670 } 671 672 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32: 673 ; SICIVI: s_mov_b32 m0 674 ; GFX9-NOT: m0 675 676 ; GCN: ds_min_u32 677 ; GCN: s_endpgm 678 define amdgpu_kernel void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind { 679 %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst 680 ret void 681 } 682 683 ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset: 684 ; SICIVI: s_mov_b32 m0 685 ; GFX9-NOT: m0 686 687 ; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 688 ; GCN: s_endpgm 689 define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 690 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 691 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst 692 ret void 693 } 694 695 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32: 696 ; SICIVI: s_mov_b32 m0 697 ; GFX9-NOT: m0 698 699 ; GCN: ds_max_u32 700 ; GCN: s_endpgm 701 define amdgpu_kernel void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind { 702 %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst 703 ret void 704 } 705 706 ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset: 707 ; SICIVI: s_mov_b32 m0 708 ; GFX9-NOT: m0 709 710 ; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 711 ; GCN: s_endpgm 712 define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 713 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 714 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst 715 ret void 716 } 717