Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
      2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
      3 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
      4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
      5 
      6 declare i32 @llvm.amdgcn.workitem.id.x()
      7 
      8 ; GCN-LABEL: {{^}}system_unordered:
      9 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
     10 ; GCN:       flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
     11 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
     12 ; GFX89-NOT: buffer_wbinvl1_vol
     13 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
     14 define amdgpu_kernel void @system_unordered(
     15     i32* %in, i32* %out) {
     16 entry:
     17   %val = load atomic i32, i32* %in unordered, align 4
     18   store i32 %val, i32* %out
     19   ret void
     20 }
     21 
     22 ; GCN-LABEL: {{^}}system_monotonic:
     23 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
     24 ; GFX89:     flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
     25 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
     26 ; GFX89-NOT: buffer_wbinvl1_vol
     27 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
     28 define amdgpu_kernel void @system_monotonic(
     29     i32* %in, i32* %out) {
     30 entry:
     31   %val = load atomic i32, i32* %in monotonic, align 4
     32   store i32 %val, i32* %out
     33   ret void
     34 }
     35 
     36 ; GCN-LABEL: {{^}}system_acquire:
     37 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
     38 ; GCN:        flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
     39 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
     40 ; GFX89-NEXT: buffer_wbinvl1_vol
     41 ; GCN:        flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
     42 define amdgpu_kernel void @system_acquire(
     43     i32* %in, i32* %out) {
     44 entry:
     45   %val = load atomic i32, i32* %in acquire, align 4
     46   store i32 %val, i32* %out
     47   ret void
     48 }
     49 
     50 ; GCN-LABEL: {{^}}system_seq_cst:
     51 ; GCN:        s_waitcnt vmcnt(0){{$}}
     52 ; GCN-NEXT:   flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
     53 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
     54 ; GFX89-NEXT: buffer_wbinvl1_vol
     55 ; GCN:        flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
     56 define amdgpu_kernel void @system_seq_cst(
     57     i32* %in, i32* %out) {
     58 entry:
     59   %val = load atomic i32, i32* %in seq_cst, align 4
     60   store i32 %val, i32* %out
     61   ret void
     62 }
     63 
     64 ; GCN-LABEL: {{^}}singlethread_unordered:
     65 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
     66 ; GCN:       flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
     67 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
     68 ; GFX89-NOT: buffer_wbinvl1_vol
     69 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
     70 define amdgpu_kernel void @singlethread_unordered(
     71     i32* %in, i32* %out) {
     72 entry:
     73   %val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4
     74   store i32 %val, i32* %out
     75   ret void
     76 }
     77 
     78 ; GCN-LABEL: {{^}}singlethread_monotonic:
     79 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
     80 ; GCN:       flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
     81 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
     82 ; GFX89-NOT: buffer_wbinvl1_vol
     83 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
     84 define amdgpu_kernel void @singlethread_monotonic(
     85     i32* %in, i32* %out) {
     86 entry:
     87   %val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4
     88   store i32 %val, i32* %out
     89   ret void
     90 }
     91 
     92 ; GCN-LABEL: {{^}}singlethread_acquire:
     93 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
     94 ; GCN:       flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
     95 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
     96 ; GFX89-NOT: buffer_wbinvl1_vol
     97 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
     98 define amdgpu_kernel void @singlethread_acquire(
     99     i32* %in, i32* %out) {
    100 entry:
    101   %val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4
    102   store i32 %val, i32* %out
    103   ret void
    104 }
    105 
    106 ; GCN-LABEL: {{^}}singlethread_seq_cst:
    107 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    108 ; GCN:       flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    109 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    110 ; GFX89-NOT: buffer_wbinvl1_vol
    111 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    112 define amdgpu_kernel void @singlethread_seq_cst(
    113     i32* %in, i32* %out) {
    114 entry:
    115   %val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4
    116   store i32 %val, i32* %out
    117   ret void
    118 }
    119 
    120 ; GCN-LABEL: {{^}}agent_unordered:
    121 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    122 ; GCN:       flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    123 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    124 ; GFX89-NOT: buffer_wbinvl1_vol
    125 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    126 define amdgpu_kernel void @agent_unordered(
    127     i32* %in, i32* %out) {
    128 entry:
    129   %val = load atomic i32, i32* %in syncscope("agent") unordered, align 4
    130   store i32 %val, i32* %out
    131   ret void
    132 }
    133 
    134 ; GCN-LABEL: {{^}}agent_monotonic:
    135 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    136 ; GFX89:     flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
    137 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    138 ; GFX89-NOT: buffer_wbinvl1_vol
    139 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    140 define amdgpu_kernel void @agent_monotonic(
    141     i32* %in, i32* %out) {
    142 entry:
    143   %val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4
    144   store i32 %val, i32* %out
    145   ret void
    146 }
    147 
    148 ; GCN-LABEL: {{^}}agent_acquire:
    149 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
    150 ; GCN:        flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
    151 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
    152 ; GFX89-NEXT: buffer_wbinvl1_vol
    153 ; GCN:        flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    154 define amdgpu_kernel void @agent_acquire(
    155     i32* %in, i32* %out) {
    156 entry:
    157   %val = load atomic i32, i32* %in syncscope("agent") acquire, align 4
    158   store i32 %val, i32* %out
    159   ret void
    160 }
    161 
    162 ; GCN-LABEL: {{^}}agent_seq_cst:
    163 ; GCN:        s_waitcnt vmcnt(0){{$}}
    164 ; GCN-NEXT:   flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
    165 ; GCN-NEXT:   s_waitcnt vmcnt(0){{$}}
    166 ; GFX89-NEXT: buffer_wbinvl1_vol
    167 ; GCN:        flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    168 define amdgpu_kernel void @agent_seq_cst(
    169     i32* %in, i32* %out) {
    170 entry:
    171   %val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4
    172   store i32 %val, i32* %out
    173   ret void
    174 }
    175 
    176 ; GCN-LABEL: {{^}}workgroup_unordered:
    177 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    178 ; GCN:       flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    179 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    180 ; GFX89-NOT: buffer_wbinvl1_vol
    181 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    182 define amdgpu_kernel void @workgroup_unordered(
    183     i32* %in, i32* %out) {
    184 entry:
    185   %val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4
    186   store i32 %val, i32* %out
    187   ret void
    188 }
    189 
    190 ; GCN-LABEL: {{^}}workgroup_monotonic:
    191 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    192 ; GFX89:     flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    193 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    194 ; GFX89-NOT: buffer_wbinvl1_vol
    195 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    196 define amdgpu_kernel void @workgroup_monotonic(
    197     i32* %in, i32* %out) {
    198 entry:
    199   %val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4
    200   store i32 %val, i32* %out
    201   ret void
    202 }
    203 
    204 ; GCN-LABEL: {{^}}workgroup_acquire:
    205 ; GCN-NOT:    s_waitcnt vmcnt(0){{$}}
    206 ; GFX89:      flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    207 ; GFX89-NOT:  s_waitcnt vmcnt(0){{$}}
    208 ; GFX89-NOT:  buffer_wbinvl1_vol
    209 ; GCN:        flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    210 define amdgpu_kernel void @workgroup_acquire(
    211     i32* %in, i32* %out) {
    212 entry:
    213   %val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4
    214   store i32 %val, i32* %out
    215   ret void
    216 }
    217 
    218 ; GCN-LABEL: {{^}}workgroup_seq_cst:
    219 ; GFX89-NOT:  s_waitcnt vmcnt(0){{$}}
    220 ; GFX89:      flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    221 ; GFX89-NOT:  s_waitcnt vmcnt(0){{$}}
    222 ; GFX89-NOT:  buffer_wbinvl1_vol
    223 ; GCN:        flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    224 define amdgpu_kernel void @workgroup_seq_cst(
    225     i32* %in, i32* %out) {
    226 entry:
    227   %val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4
    228   store i32 %val, i32* %out
    229   ret void
    230 }
    231 
    232 ; GCN-LABEL: {{^}}wavefront_unordered:
    233 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    234 ; GCN:       flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    235 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    236 ; GFX89-NOT: buffer_wbinvl1_vol
    237 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    238 define amdgpu_kernel void @wavefront_unordered(
    239     i32* %in, i32* %out) {
    240 entry:
    241   %val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4
    242   store i32 %val, i32* %out
    243   ret void
    244 }
    245 
    246 ; GCN-LABEL: {{^}}wavefront_monotonic:
    247 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    248 ; GCN:       flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    249 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    250 ; GFX89-NOT: buffer_wbinvl1_vol
    251 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    252 define amdgpu_kernel void @wavefront_monotonic(
    253     i32* %in, i32* %out) {
    254 entry:
    255   %val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4
    256   store i32 %val, i32* %out
    257   ret void
    258 }
    259 
    260 ; GCN-LABEL: {{^}}wavefront_acquire:
    261 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    262 ; GCN:       flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    263 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    264 ; GFX89-NOT: buffer_wbinvl1_vol
    265 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    266 define amdgpu_kernel void @wavefront_acquire(
    267     i32* %in, i32* %out) {
    268 entry:
    269   %val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4
    270   store i32 %val, i32* %out
    271   ret void
    272 }
    273 
    274 ; GCN-LABEL: {{^}}wavefront_seq_cst:
    275 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    276 ; GCN:       flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
    277 ; GCN-NOT:   s_waitcnt vmcnt(0){{$}}
    278 ; GFX89-NOT: buffer_wbinvl1_vol
    279 ; GCN:       flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
    280 define amdgpu_kernel void @wavefront_seq_cst(
    281     i32* %in, i32* %out) {
    282 entry:
    283   %val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4
    284   store i32 %val, i32* %out
    285   ret void
    286 }
    287 
    288 ; GCN-LABEL: {{^}}nontemporal_private_0:
    289 ; GFX89: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
    290 define amdgpu_kernel void @nontemporal_private_0(
    291     i32 addrspace(5)* %in, i32* %out) {
    292 entry:
    293   %val = load i32, i32 addrspace(5)* %in, align 4, !nontemporal !0
    294   store i32 %val, i32* %out
    295   ret void
    296 }
    297 
    298 ; GCN-LABEL: {{^}}nontemporal_private_1:
    299 ; GFX89: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
    300 define amdgpu_kernel void @nontemporal_private_1(
    301     i32 addrspace(5)* %in, i32* %out) {
    302 entry:
    303   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    304   %val.gep = getelementptr inbounds i32, i32 addrspace(5)* %in, i32 %tid
    305   %val = load i32, i32 addrspace(5)* %val.gep, align 4, !nontemporal !0
    306   store i32 %val, i32* %out
    307   ret void
    308 }
    309 
    310 ; GCN-LABEL: {{^}}nontemporal_global_0:
    311 ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}}
    312 define amdgpu_kernel void @nontemporal_global_0(
    313     i32 addrspace(1)* %in, i32* %out) {
    314 entry:
    315   %val = load i32, i32 addrspace(1)* %in, align 4, !nontemporal !0
    316   store i32 %val, i32* %out
    317   ret void
    318 }
    319 
    320 ; GCN-LABEL: {{^}}nontemporal_global_1:
    321 ; GFX8:  flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
    322 ; GFX9:  global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], off glc slc{{$}}
    323 define amdgpu_kernel void @nontemporal_global_1(
    324     i32 addrspace(1)* %in, i32* %out) {
    325 entry:
    326   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    327   %val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
    328   %val = load i32, i32 addrspace(1)* %val.gep, align 4, !nontemporal !0
    329   store i32 %val, i32* %out
    330   ret void
    331 }
    332 
    333 ; GCN-LABEL: {{^}}nontemporal_local_0:
    334 ; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
    335 define amdgpu_kernel void @nontemporal_local_0(
    336     i32 addrspace(3)* %in, i32* %out) {
    337 entry:
    338   %val = load i32, i32 addrspace(3)* %in, align 4, !nontemporal !0
    339   store i32 %val, i32* %out
    340   ret void
    341 }
    342 
    343 ; GCN-LABEL: {{^}}nontemporal_local_1:
    344 ; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
    345 define amdgpu_kernel void @nontemporal_local_1(
    346     i32 addrspace(3)* %in, i32* %out) {
    347 entry:
    348   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    349   %val.gep = getelementptr inbounds i32, i32 addrspace(3)* %in, i32 %tid
    350   %val = load i32, i32 addrspace(3)* %val.gep, align 4, !nontemporal !0
    351   store i32 %val, i32* %out
    352   ret void
    353 }
    354 
    355 ; GCN-LABEL: {{^}}nontemporal_flat_0:
    356 ; GFX89: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
    357 define amdgpu_kernel void @nontemporal_flat_0(
    358     i32* %in, i32* %out) {
    359 entry:
    360   %val = load i32, i32* %in, align 4, !nontemporal !0
    361   store i32 %val, i32* %out
    362   ret void
    363 }
    364 
    365 ; GCN-LABEL: {{^}}nontemporal_flat_1:
    366 ; GFX89: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
    367 define amdgpu_kernel void @nontemporal_flat_1(
    368     i32* %in, i32* %out) {
    369 entry:
    370   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    371   %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid
    372   %val = load i32, i32* %val.gep, align 4, !nontemporal !0
    373   store i32 %val, i32* %out
    374   ret void
    375 }
    376 
    377 !0 = !{i32 1}
    378