1 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s 2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s 3 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s 4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s 5 6 declare i32 @llvm.amdgcn.workitem.id.x() 7 8 ; GCN-LABEL: {{^}}system_unordered: 9 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 10 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 11 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 12 ; GFX89-NOT: buffer_wbinvl1_vol 13 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 14 define amdgpu_kernel void @system_unordered( 15 i32* %in, i32* %out) { 16 entry: 17 %val = load atomic i32, i32* %in unordered, align 4 18 store i32 %val, i32* %out 19 ret void 20 } 21 22 ; GCN-LABEL: {{^}}system_monotonic: 23 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 24 ; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 25 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 26 ; GFX89-NOT: buffer_wbinvl1_vol 27 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 28 define amdgpu_kernel void @system_monotonic( 29 i32* %in, i32* %out) { 30 entry: 31 %val = load atomic i32, i32* %in monotonic, align 4 32 store i32 %val, i32* %out 33 ret void 34 } 35 36 ; GCN-LABEL: {{^}}system_acquire: 37 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 38 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 39 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 40 ; GFX89-NEXT: buffer_wbinvl1_vol 41 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 42 define amdgpu_kernel void @system_acquire( 43 i32* %in, i32* %out) { 44 entry: 45 %val = load atomic i32, i32* %in acquire, align 4 46 store i32 %val, i32* %out 47 ret void 48 } 49 50 ; GCN-LABEL: {{^}}system_seq_cst: 51 ; GCN: s_waitcnt vmcnt(0){{$}} 52 ; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 53 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 54 ; GFX89-NEXT: buffer_wbinvl1_vol 55 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 56 define amdgpu_kernel void @system_seq_cst( 57 i32* %in, i32* %out) { 58 entry: 59 %val = load atomic i32, i32* %in seq_cst, align 4 60 store i32 %val, i32* %out 61 ret void 62 } 63 64 ; GCN-LABEL: {{^}}singlethread_unordered: 65 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 66 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 67 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 68 ; GFX89-NOT: buffer_wbinvl1_vol 69 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 70 define amdgpu_kernel void @singlethread_unordered( 71 i32* %in, i32* %out) { 72 entry: 73 %val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4 74 store i32 %val, i32* %out 75 ret void 76 } 77 78 ; GCN-LABEL: {{^}}singlethread_monotonic: 79 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 80 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 81 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 82 ; GFX89-NOT: buffer_wbinvl1_vol 83 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 84 define amdgpu_kernel void @singlethread_monotonic( 85 i32* %in, i32* %out) { 86 entry: 87 %val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4 88 store i32 %val, i32* %out 89 ret void 90 } 91 92 ; GCN-LABEL: {{^}}singlethread_acquire: 93 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 94 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 95 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 96 ; GFX89-NOT: buffer_wbinvl1_vol 97 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 98 define amdgpu_kernel void @singlethread_acquire( 99 i32* %in, i32* %out) { 100 entry: 101 %val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4 102 store i32 %val, i32* %out 103 ret void 104 } 105 106 ; GCN-LABEL: {{^}}singlethread_seq_cst: 107 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 108 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 109 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 110 ; GFX89-NOT: buffer_wbinvl1_vol 111 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 112 define amdgpu_kernel void @singlethread_seq_cst( 113 i32* %in, i32* %out) { 114 entry: 115 %val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4 116 store i32 %val, i32* %out 117 ret void 118 } 119 120 ; GCN-LABEL: {{^}}agent_unordered: 121 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 122 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 123 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 124 ; GFX89-NOT: buffer_wbinvl1_vol 125 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 126 define amdgpu_kernel void @agent_unordered( 127 i32* %in, i32* %out) { 128 entry: 129 %val = load atomic i32, i32* %in syncscope("agent") unordered, align 4 130 store i32 %val, i32* %out 131 ret void 132 } 133 134 ; GCN-LABEL: {{^}}agent_monotonic: 135 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 136 ; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 137 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 138 ; GFX89-NOT: buffer_wbinvl1_vol 139 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 140 define amdgpu_kernel void @agent_monotonic( 141 i32* %in, i32* %out) { 142 entry: 143 %val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4 144 store i32 %val, i32* %out 145 ret void 146 } 147 148 ; GCN-LABEL: {{^}}agent_acquire: 149 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 150 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 151 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 152 ; GFX89-NEXT: buffer_wbinvl1_vol 153 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 154 define amdgpu_kernel void @agent_acquire( 155 i32* %in, i32* %out) { 156 entry: 157 %val = load atomic i32, i32* %in syncscope("agent") acquire, align 4 158 store i32 %val, i32* %out 159 ret void 160 } 161 162 ; GCN-LABEL: {{^}}agent_seq_cst: 163 ; GCN: s_waitcnt vmcnt(0){{$}} 164 ; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 165 ; GCN-NEXT: s_waitcnt vmcnt(0){{$}} 166 ; GFX89-NEXT: buffer_wbinvl1_vol 167 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 168 define amdgpu_kernel void @agent_seq_cst( 169 i32* %in, i32* %out) { 170 entry: 171 %val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4 172 store i32 %val, i32* %out 173 ret void 174 } 175 176 ; GCN-LABEL: {{^}}workgroup_unordered: 177 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 178 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 179 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 180 ; GFX89-NOT: buffer_wbinvl1_vol 181 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 182 define amdgpu_kernel void @workgroup_unordered( 183 i32* %in, i32* %out) { 184 entry: 185 %val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4 186 store i32 %val, i32* %out 187 ret void 188 } 189 190 ; GCN-LABEL: {{^}}workgroup_monotonic: 191 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 192 ; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 193 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 194 ; GFX89-NOT: buffer_wbinvl1_vol 195 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 196 define amdgpu_kernel void @workgroup_monotonic( 197 i32* %in, i32* %out) { 198 entry: 199 %val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4 200 store i32 %val, i32* %out 201 ret void 202 } 203 204 ; GCN-LABEL: {{^}}workgroup_acquire: 205 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 206 ; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 207 ; GFX89-NOT: s_waitcnt vmcnt(0){{$}} 208 ; GFX89-NOT: buffer_wbinvl1_vol 209 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 210 define amdgpu_kernel void @workgroup_acquire( 211 i32* %in, i32* %out) { 212 entry: 213 %val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4 214 store i32 %val, i32* %out 215 ret void 216 } 217 218 ; GCN-LABEL: {{^}}workgroup_seq_cst: 219 ; GFX89-NOT: s_waitcnt vmcnt(0){{$}} 220 ; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 221 ; GFX89-NOT: s_waitcnt vmcnt(0){{$}} 222 ; GFX89-NOT: buffer_wbinvl1_vol 223 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 224 define amdgpu_kernel void @workgroup_seq_cst( 225 i32* %in, i32* %out) { 226 entry: 227 %val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4 228 store i32 %val, i32* %out 229 ret void 230 } 231 232 ; GCN-LABEL: {{^}}wavefront_unordered: 233 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 234 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 235 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 236 ; GFX89-NOT: buffer_wbinvl1_vol 237 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 238 define amdgpu_kernel void @wavefront_unordered( 239 i32* %in, i32* %out) { 240 entry: 241 %val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4 242 store i32 %val, i32* %out 243 ret void 244 } 245 246 ; GCN-LABEL: {{^}}wavefront_monotonic: 247 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 248 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 249 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 250 ; GFX89-NOT: buffer_wbinvl1_vol 251 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 252 define amdgpu_kernel void @wavefront_monotonic( 253 i32* %in, i32* %out) { 254 entry: 255 %val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4 256 store i32 %val, i32* %out 257 ret void 258 } 259 260 ; GCN-LABEL: {{^}}wavefront_acquire: 261 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 262 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 263 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 264 ; GFX89-NOT: buffer_wbinvl1_vol 265 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 266 define amdgpu_kernel void @wavefront_acquire( 267 i32* %in, i32* %out) { 268 entry: 269 %val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4 270 store i32 %val, i32* %out 271 ret void 272 } 273 274 ; GCN-LABEL: {{^}}wavefront_seq_cst: 275 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 276 ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 277 ; GCN-NOT: s_waitcnt vmcnt(0){{$}} 278 ; GFX89-NOT: buffer_wbinvl1_vol 279 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 280 define amdgpu_kernel void @wavefront_seq_cst( 281 i32* %in, i32* %out) { 282 entry: 283 %val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4 284 store i32 %val, i32* %out 285 ret void 286 } 287 288 ; GCN-LABEL: {{^}}nontemporal_private_0: 289 ; GFX89: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} 290 define amdgpu_kernel void @nontemporal_private_0( 291 i32 addrspace(5)* %in, i32* %out) { 292 entry: 293 %val = load i32, i32 addrspace(5)* %in, align 4, !nontemporal !0 294 store i32 %val, i32* %out 295 ret void 296 } 297 298 ; GCN-LABEL: {{^}}nontemporal_private_1: 299 ; GFX89: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} 300 define amdgpu_kernel void @nontemporal_private_1( 301 i32 addrspace(5)* %in, i32* %out) { 302 entry: 303 %tid = call i32 @llvm.amdgcn.workitem.id.x() 304 %val.gep = getelementptr inbounds i32, i32 addrspace(5)* %in, i32 %tid 305 %val = load i32, i32 addrspace(5)* %val.gep, align 4, !nontemporal !0 306 store i32 %val, i32* %out 307 ret void 308 } 309 310 ; GCN-LABEL: {{^}}nontemporal_global_0: 311 ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}} 312 define amdgpu_kernel void @nontemporal_global_0( 313 i32 addrspace(1)* %in, i32* %out) { 314 entry: 315 %val = load i32, i32 addrspace(1)* %in, align 4, !nontemporal !0 316 store i32 %val, i32* %out 317 ret void 318 } 319 320 ; GCN-LABEL: {{^}}nontemporal_global_1: 321 ; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} 322 ; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], off glc slc{{$}} 323 define amdgpu_kernel void @nontemporal_global_1( 324 i32 addrspace(1)* %in, i32* %out) { 325 entry: 326 %tid = call i32 @llvm.amdgcn.workitem.id.x() 327 %val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid 328 %val = load i32, i32 addrspace(1)* %val.gep, align 4, !nontemporal !0 329 store i32 %val, i32* %out 330 ret void 331 } 332 333 ; GCN-LABEL: {{^}}nontemporal_local_0: 334 ; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} 335 define amdgpu_kernel void @nontemporal_local_0( 336 i32 addrspace(3)* %in, i32* %out) { 337 entry: 338 %val = load i32, i32 addrspace(3)* %in, align 4, !nontemporal !0 339 store i32 %val, i32* %out 340 ret void 341 } 342 343 ; GCN-LABEL: {{^}}nontemporal_local_1: 344 ; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} 345 define amdgpu_kernel void @nontemporal_local_1( 346 i32 addrspace(3)* %in, i32* %out) { 347 entry: 348 %tid = call i32 @llvm.amdgcn.workitem.id.x() 349 %val.gep = getelementptr inbounds i32, i32 addrspace(3)* %in, i32 %tid 350 %val = load i32, i32 addrspace(3)* %val.gep, align 4, !nontemporal !0 351 store i32 %val, i32* %out 352 ret void 353 } 354 355 ; GCN-LABEL: {{^}}nontemporal_flat_0: 356 ; GFX89: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} 357 define amdgpu_kernel void @nontemporal_flat_0( 358 i32* %in, i32* %out) { 359 entry: 360 %val = load i32, i32* %in, align 4, !nontemporal !0 361 store i32 %val, i32* %out 362 ret void 363 } 364 365 ; GCN-LABEL: {{^}}nontemporal_flat_1: 366 ; GFX89: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} 367 define amdgpu_kernel void @nontemporal_flat_1( 368 i32* %in, i32* %out) { 369 entry: 370 %tid = call i32 @llvm.amdgcn.workitem.id.x() 371 %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid 372 %val = load i32, i32* %val.gep, align 4, !nontemporal !0 373 store i32 %val, i32* %out 374 ret void 375 } 376 377 !0 = !{i32 1} 378