1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SI,SICIVI %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,VI,SICIVI,GFX89 %s 3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9,GFX89 %s 4 5 ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64: 6 ; SICIVI: s_mov_b32 m0 7 ; GFX9-NOT: m0 8 9 ; GCN: ds_wrxchg_rtn_b64 10 ; GCN: s_endpgm 11 define amdgpu_kernel void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 12 %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst 13 store i64 %result, i64 addrspace(1)* %out, align 8 14 ret void 15 } 16 17 ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset: 18 ; SICIVI: s_mov_b32 m0 19 ; GFX9-NOT: m0 20 21 ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 22 ; GCN: s_endpgm 23 define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 24 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 25 %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst 26 store i64 %result, i64 addrspace(1)* %out, align 8 27 ret void 28 } 29 30 ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64: 31 ; SICIVI: s_mov_b32 m0 32 ; GFX9-NOT: m0 33 34 ; GCN: ds_add_rtn_u64 35 ; GCN: s_endpgm 36 define amdgpu_kernel void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 37 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst 38 store i64 %result, i64 addrspace(1)* %out, align 8 39 ret void 40 } 41 42 ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64_offset: 43 ; SICIVI-DAG: s_mov_b32 m0 44 ; GFX9-NOT: m0 45 46 ; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 47 ; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 48 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 49 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 50 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] 51 ; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 52 ; GCN: buffer_store_dwordx2 [[RESULT]], 53 ; GCN: s_endpgm 54 define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 55 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4 56 %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst 57 store i64 %result, i64 addrspace(1)* %out, align 8 58 ret void 59 } 60 61 ; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64: 62 ; SICIVI-DAG: s_mov_b32 m0 63 ; GFX9-NOT: m0 64 65 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 66 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 67 ; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} 68 ; GCN: buffer_store_dwordx2 [[RESULT]], 69 ; GCN: s_endpgm 70 define amdgpu_kernel void @lds_atomic_add1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 71 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst 72 store i64 %result, i64 addrspace(1)* %out, align 8 73 ret void 74 } 75 76 ; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64_offset: 77 ; SICIVI: s_mov_b32 m0 78 ; GFX9-NOT: m0 79 80 ; GCN: ds_add_rtn_u64 {{.*}} offset:32 81 ; GCN: s_endpgm 82 define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 83 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 84 %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst 85 store i64 %result, i64 addrspace(1)* %out, align 8 86 ret void 87 } 88 89 ; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64: 90 ; SICIVI: s_mov_b32 m0 91 ; GFX9-NOT: m0 92 93 ; GCN: ds_sub_rtn_u64 94 ; GCN: s_endpgm 95 define amdgpu_kernel void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 96 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst 97 store i64 %result, i64 addrspace(1)* %out, align 8 98 ret void 99 } 100 101 ; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64_offset: 102 ; SICIVI: s_mov_b32 m0 103 ; GFX9-NOT: m0 104 105 ; GCN: ds_sub_rtn_u64 {{.*}} offset:32 106 ; GCN: s_endpgm 107 define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 108 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 109 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst 110 store i64 %result, i64 addrspace(1)* %out, align 8 111 ret void 112 } 113 114 ; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64: 115 ; SICIVI-DAG: s_mov_b32 m0 116 ; GFX9-NOT: m0 117 118 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 119 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 120 ; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} 121 ; GCN: buffer_store_dwordx2 [[RESULT]], 122 ; GCN: s_endpgm 123 define amdgpu_kernel void @lds_atomic_sub1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 124 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst 125 store i64 %result, i64 addrspace(1)* %out, align 8 126 ret void 127 } 128 129 ; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64_offset: 130 ; SICIVI: s_mov_b32 m0 131 ; GFX9-NOT: m0 132 133 ; GCN: ds_sub_rtn_u64 {{.*}} offset:32 134 ; GCN: s_endpgm 135 define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 136 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 137 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst 138 store i64 %result, i64 addrspace(1)* %out, align 8 139 ret void 140 } 141 142 ; GCN-LABEL: {{^}}lds_atomic_and_ret_i64: 143 ; SICIVI: s_mov_b32 m0 144 ; GFX9-NOT: m0 145 146 ; GCN: ds_and_rtn_b64 147 ; GCN: s_endpgm 148 define amdgpu_kernel void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 149 %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst 150 store i64 %result, i64 addrspace(1)* %out, align 8 151 ret void 152 } 153 154 ; GCN-LABEL: {{^}}lds_atomic_and_ret_i64_offset: 155 ; SICIVI: s_mov_b32 m0 156 ; GFX9-NOT: m0 157 158 ; GCN: ds_and_rtn_b64 {{.*}} offset:32 159 ; GCN: s_endpgm 160 define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 161 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 162 %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst 163 store i64 %result, i64 addrspace(1)* %out, align 8 164 ret void 165 } 166 167 ; GCN-LABEL: {{^}}lds_atomic_or_ret_i64: 168 ; SICIVI: s_mov_b32 m0 169 ; GFX9-NOT: m0 170 171 ; GCN: ds_or_rtn_b64 172 ; GCN: s_endpgm 173 define amdgpu_kernel void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 174 %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst 175 store i64 %result, i64 addrspace(1)* %out, align 8 176 ret void 177 } 178 179 ; GCN-LABEL: {{^}}lds_atomic_or_ret_i64_offset: 180 ; SICIVI: s_mov_b32 m0 181 ; GFX9-NOT: m0 182 183 ; GCN: ds_or_rtn_b64 {{.*}} offset:32 184 ; GCN: s_endpgm 185 define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 186 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 187 %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst 188 store i64 %result, i64 addrspace(1)* %out, align 8 189 ret void 190 } 191 192 ; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64: 193 ; SICIVI: s_mov_b32 m0 194 ; GFX9-NOT: m0 195 196 ; GCN: ds_xor_rtn_b64 197 ; GCN: s_endpgm 198 define amdgpu_kernel void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 199 %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst 200 store i64 %result, i64 addrspace(1)* %out, align 8 201 ret void 202 } 203 204 ; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64_offset: 205 ; SICIVI: s_mov_b32 m0 206 ; GFX9-NOT: m0 207 208 ; GCN: ds_xor_rtn_b64 {{.*}} offset:32 209 ; GCN: s_endpgm 210 define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 211 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 212 %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst 213 store i64 %result, i64 addrspace(1)* %out, align 8 214 ret void 215 } 216 217 ; FIXME: There is no atomic nand instr 218 ; XGCN-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this. 219 ; define amdgpu_kernel void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 220 ; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst 221 ; store i64 %result, i64 addrspace(1)* %out, align 8 222 ; ret void 223 ; } 224 225 ; GCN-LABEL: {{^}}lds_atomic_min_ret_i64: 226 ; SICIVI: s_mov_b32 m0 227 ; GFX9-NOT: m0 228 229 ; GCN: ds_min_rtn_i64 230 ; GCN: s_endpgm 231 define amdgpu_kernel void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 232 %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst 233 store i64 %result, i64 addrspace(1)* %out, align 8 234 ret void 235 } 236 237 ; GCN-LABEL: {{^}}lds_atomic_min_ret_i64_offset: 238 ; SICIVI: s_mov_b32 m0 239 ; GFX9-NOT: m0 240 241 ; GCN: ds_min_rtn_i64 {{.*}} offset:32 242 ; GCN: s_endpgm 243 define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 244 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 245 %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst 246 store i64 %result, i64 addrspace(1)* %out, align 8 247 ret void 248 } 249 250 ; GCN-LABEL: {{^}}lds_atomic_max_ret_i64: 251 ; SICIVI: s_mov_b32 m0 252 ; GFX9-NOT: m0 253 254 ; GCN: ds_max_rtn_i64 255 ; GCN: s_endpgm 256 define amdgpu_kernel void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 257 %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst 258 store i64 %result, i64 addrspace(1)* %out, align 8 259 ret void 260 } 261 262 ; GCN-LABEL: {{^}}lds_atomic_max_ret_i64_offset: 263 ; SICIVI: s_mov_b32 m0 264 ; GFX9-NOT: m0 265 266 ; GCN: ds_max_rtn_i64 {{.*}} offset:32 267 ; GCN: s_endpgm 268 define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 269 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 270 %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst 271 store i64 %result, i64 addrspace(1)* %out, align 8 272 ret void 273 } 274 275 ; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64: 276 ; SICIVI: s_mov_b32 m0 277 ; GFX9-NOT: m0 278 279 ; GCN: ds_min_rtn_u64 280 ; GCN: s_endpgm 281 define amdgpu_kernel void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 282 %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst 283 store i64 %result, i64 addrspace(1)* %out, align 8 284 ret void 285 } 286 287 ; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64_offset: 288 ; SICIVI: s_mov_b32 m0 289 ; GFX9-NOT: m0 290 291 ; GCN: ds_min_rtn_u64 {{.*}} offset:32 292 ; GCN: s_endpgm 293 define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 294 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 295 %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst 296 store i64 %result, i64 addrspace(1)* %out, align 8 297 ret void 298 } 299 300 ; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64: 301 ; SICIVI: s_mov_b32 m0 302 ; GFX9-NOT: m0 303 304 ; GCN: ds_max_rtn_u64 305 ; GCN: s_endpgm 306 define amdgpu_kernel void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 307 %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst 308 store i64 %result, i64 addrspace(1)* %out, align 8 309 ret void 310 } 311 312 ; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64_offset: 313 ; SICIVI: s_mov_b32 m0 314 ; GFX9-NOT: m0 315 316 ; GCN: ds_max_rtn_u64 {{.*}} offset:32 317 ; GCN: s_endpgm 318 define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 319 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 320 %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst 321 store i64 %result, i64 addrspace(1)* %out, align 8 322 ret void 323 } 324 325 ; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64: 326 ; SICIVI: s_mov_b32 m0 327 ; GFX9-NOT: m0 328 329 ; GCN: ds_wrxchg_rtn_b64 330 ; GCN: s_endpgm 331 define amdgpu_kernel void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind { 332 %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst 333 ret void 334 } 335 336 ; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset: 337 ; SICIVI: s_mov_b32 m0 338 ; GFX9-NOT: m0 339 340 ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 341 ; GCN: s_endpgm 342 define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 343 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 344 %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst 345 ret void 346 } 347 348 ; GCN-LABEL: {{^}}lds_atomic_add_noret_i64: 349 ; SICIVI: s_mov_b32 m0 350 ; GFX9-NOT: m0 351 352 ; GCN: ds_add_u64 353 ; GCN: s_endpgm 354 define amdgpu_kernel void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind { 355 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst 356 ret void 357 } 358 359 ; GCN-LABEL: {{^}}lds_atomic_add_noret_i64_offset: 360 ; SICIVI-DAG: s_mov_b32 m0 361 ; GFX9-NOT: m0 362 363 ; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 364 ; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 365 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 366 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 367 ; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] 368 ; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 369 ; GCN: s_endpgm 370 define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 371 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4 372 %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst 373 ret void 374 } 375 376 ; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64: 377 ; SICIVI-DAG: s_mov_b32 m0 378 ; GFX9-NOT: m0 379 380 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 381 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 382 ; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} 383 ; GCN: s_endpgm 384 define amdgpu_kernel void @lds_atomic_add1_noret_i64(i64 addrspace(3)* %ptr) nounwind { 385 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst 386 ret void 387 } 388 389 ; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64_offset: 390 ; SICIVI: s_mov_b32 m0 391 ; GFX9-NOT: m0 392 393 ; GCN: ds_add_u64 {{.*}} offset:32 394 ; GCN: s_endpgm 395 define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 396 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 397 %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst 398 ret void 399 } 400 401 ; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64: 402 ; SICIVI: s_mov_b32 m0 403 ; GFX9-NOT: m0 404 405 ; GCN: ds_sub_u64 406 ; GCN: s_endpgm 407 define amdgpu_kernel void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind { 408 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst 409 ret void 410 } 411 412 ; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64_offset: 413 ; SICIVI: s_mov_b32 m0 414 ; GFX9-NOT: m0 415 416 ; GCN: ds_sub_u64 {{.*}} offset:32 417 ; GCN: s_endpgm 418 define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 419 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 420 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst 421 ret void 422 } 423 424 ; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64: 425 ; SICIVI-DAG: s_mov_b32 m0 426 ; GFX9-NOT: m0 427 428 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 429 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 430 ; GCN: ds_sub_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} 431 ; GCN: s_endpgm 432 define amdgpu_kernel void @lds_atomic_sub1_noret_i64(i64 addrspace(3)* %ptr) nounwind { 433 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst 434 ret void 435 } 436 437 ; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64_offset: 438 ; SICIVI: s_mov_b32 m0 439 ; GFX9-NOT: m0 440 441 ; GCN: ds_sub_u64 {{.*}} offset:32 442 ; GCN: s_endpgm 443 define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 444 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 445 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst 446 ret void 447 } 448 449 ; GCN-LABEL: {{^}}lds_atomic_and_noret_i64: 450 ; SICIVI: s_mov_b32 m0 451 ; GFX9-NOT: m0 452 453 ; GCN: ds_and_b64 454 ; GCN: s_endpgm 455 define amdgpu_kernel void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind { 456 %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst 457 ret void 458 } 459 460 ; GCN-LABEL: {{^}}lds_atomic_and_noret_i64_offset: 461 ; SICIVI: s_mov_b32 m0 462 ; GFX9-NOT: m0 463 464 ; GCN: ds_and_b64 {{.*}} offset:32 465 ; GCN: s_endpgm 466 define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 467 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 468 %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst 469 ret void 470 } 471 472 ; GCN-LABEL: {{^}}lds_atomic_or_noret_i64: 473 ; SICIVI: s_mov_b32 m0 474 ; GFX9-NOT: m0 475 476 ; GCN: ds_or_b64 477 ; GCN: s_endpgm 478 define amdgpu_kernel void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind { 479 %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst 480 ret void 481 } 482 483 ; GCN-LABEL: {{^}}lds_atomic_or_noret_i64_offset: 484 ; SICIVI: s_mov_b32 m0 485 ; GFX9-NOT: m0 486 487 ; GCN: ds_or_b64 {{.*}} offset:32 488 ; GCN: s_endpgm 489 define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 490 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 491 %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst 492 ret void 493 } 494 495 ; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64: 496 ; SICIVI: s_mov_b32 m0 497 ; GFX9-NOT: m0 498 499 ; GCN: ds_xor_b64 500 ; GCN: s_endpgm 501 define amdgpu_kernel void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind { 502 %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst 503 ret void 504 } 505 506 ; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64_offset: 507 ; SICIVI: s_mov_b32 m0 508 ; GFX9-NOT: m0 509 510 ; GCN: ds_xor_b64 {{.*}} offset:32 511 ; GCN: s_endpgm 512 define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 513 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 514 %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst 515 ret void 516 } 517 518 ; FIXME: There is no atomic nand instr 519 ; XGCN-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this. 520 ; define amdgpu_kernel void @lds_atomic_nand_noret_i64(i64 addrspace(3)* %ptr) nounwind { 521 ; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst 522 ; ret void 523 ; } 524 525 ; GCN-LABEL: {{^}}lds_atomic_min_noret_i64: 526 ; SICIVI: s_mov_b32 m0 527 ; GFX9-NOT: m0 528 529 ; GCN: ds_min_i64 530 ; GCN: s_endpgm 531 define amdgpu_kernel void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind { 532 %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst 533 ret void 534 } 535 536 ; GCN-LABEL: {{^}}lds_atomic_min_noret_i64_offset: 537 ; SICIVI: s_mov_b32 m0 538 ; GFX9-NOT: m0 539 540 ; GCN: ds_min_i64 {{.*}} offset:32 541 ; GCN: s_endpgm 542 define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 543 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 544 %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst 545 ret void 546 } 547 548 ; GCN-LABEL: {{^}}lds_atomic_max_noret_i64: 549 ; SICIVI: s_mov_b32 m0 550 ; GFX9-NOT: m0 551 552 ; GCN: ds_max_i64 553 ; GCN: s_endpgm 554 define amdgpu_kernel void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind { 555 %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst 556 ret void 557 } 558 559 ; GCN-LABEL: {{^}}lds_atomic_max_noret_i64_offset: 560 ; SICIVI: s_mov_b32 m0 561 ; GFX9-NOT: m0 562 563 ; GCN: ds_max_i64 {{.*}} offset:32 564 ; GCN: s_endpgm 565 define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 566 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 567 %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst 568 ret void 569 } 570 571 ; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64: 572 ; SICIVI: s_mov_b32 m0 573 ; GFX9-NOT: m0 574 575 ; GCN: ds_min_u64 576 ; GCN: s_endpgm 577 define amdgpu_kernel void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind { 578 %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst 579 ret void 580 } 581 582 ; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64_offset: 583 ; SICIVI: s_mov_b32 m0 584 ; GFX9-NOT: m0 585 586 ; GCN: ds_min_u64 {{.*}} offset:32 587 ; GCN: s_endpgm 588 define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 589 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 590 %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst 591 ret void 592 } 593 594 ; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64: 595 ; SICIVI: s_mov_b32 m0 596 ; GFX9-NOT: m0 597 598 ; GCN: ds_max_u64 599 ; GCN: s_endpgm 600 define amdgpu_kernel void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind { 601 %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst 602 ret void 603 } 604 605 ; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64_offset: 606 ; SICIVI: s_mov_b32 m0 607 ; GFX9-NOT: m0 608 609 ; GCN: ds_max_u64 {{.*}} offset:32 610 ; GCN: s_endpgm 611 define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 612 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 613 %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst 614 ret void 615 } 616