Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
      3 
      4 declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32) #2
      5 declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
      6 declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2
      7 
      8 declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64) #2
      9 declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64) #2
     10 declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2
     11 
     12 declare i32 @llvm.amdgcn.workitem.id.x() #1
     13 
     14 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32:
     15 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     16 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
     17 define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
     18   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
     19   store i32 %result, i32 addrspace(1)* %out
     20   ret void
     21 }
     22 
     23 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
     24 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     25 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
     26 define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
     27   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
     28   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
     29   store i32 %result, i32 addrspace(1)* %out
     30   ret void
     31 }
     32 
     33 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
     34 ; GCN: s_load_dword [[SPTR:s[0-9]+]],
     35 ; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
     36 ; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
     37 ; GCN: ds_inc_u32 [[VPTR]], [[DATA]]
     38 define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
     39   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
     40   ret void
     41 }
     42 
     43 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
     44 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     45 ; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16
     46 define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
     47   %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
     48   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
     49   ret void
     50 }
     51 
     52 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32:
     53 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     54 ; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
     55 define void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
     56   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
     57   store i32 %result, i32 addrspace(1)* %out
     58   ret void
     59 }
     60 
     61 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset:
     62 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     63 ; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
     64 define void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
     65   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
     66   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
     67   store i32 %result, i32 addrspace(1)* %out
     68   ret void
     69 }
     70 
     71 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i32:
     72 ; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
     73 define void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind {
     74   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
     75   ret void
     76 }
     77 
     78 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i32_offset:
     79 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     80 ; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
     81 define void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
     82   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
     83   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
     84   ret void
     85 }
     86 
     87 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset_addr64:
     88 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
     89 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
     90 ; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
     91 define void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
     92   %id = call i32 @llvm.amdgcn.workitem.id.x()
     93   %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
     94   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
     95   %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
     96   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
     97   store i32 %result, i32 addrspace(1)* %out.gep
     98   ret void
     99 }
    100 
    101 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset_addr64:
    102 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    103 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
    104 ; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    105 define void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
    106   %id = call i32 @llvm.amdgcn.workitem.id.x()
    107   %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
    108   %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
    109   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
    110   ret void
    111 }
    112 
    113 @lds0 = addrspace(3) global [512 x i32] undef, align 4
    114 
    115 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32:
    116 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
    117 ; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
    118 define void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
    119   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
    120   %idx.0 = add nsw i32 %tid.x, 2
    121   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
    122   %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9)
    123   store i32 %idx.0, i32 addrspace(1)* %add_use
    124   store i32 %val0, i32 addrspace(1)* %out
    125   ret void
    126 }
    127 
    128 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
    129 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    130 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    131 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
    132 define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
    133   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
    134   store i64 %result, i64 addrspace(1)* %out
    135   ret void
    136 }
    137 
    138 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
    139 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    140 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    141 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
    142 define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
    143   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    144   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
    145   store i64 %result, i64 addrspace(1)* %out
    146   ret void
    147 }
    148 
    149 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64:
    150 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    151 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    152 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
    153 define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
    154   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
    155   ret void
    156 }
    157 
    158 ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
    159 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    160 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    161 ; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
    162 define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
    163   %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
    164   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
    165   ret void
    166 }
    167 
    168 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64:
    169 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    170 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    171 ; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
    172 define void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
    173   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
    174   store i64 %result, i64 addrspace(1)* %out
    175   ret void
    176 }
    177 
    178 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset:
    179 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    180 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    181 ; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
    182 define void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
    183   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
    184   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
    185   store i64 %result, i64 addrspace(1)* %out
    186   ret void
    187 }
    188 
    189 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i64:
    190 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    191 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    192 ; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
    193 define void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
    194   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
    195   ret void
    196 }
    197 
    198 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i64_offset:
    199 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    200 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    201 ; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
    202 define void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
    203   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
    204   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
    205   ret void
    206 }
    207 
    208 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64:
    209 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    210 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    211 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
    212 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    213 define void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
    214   %id = call i32 @llvm.amdgcn.workitem.id.x()
    215   %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
    216   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
    217   %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
    218   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
    219   store i64 %result, i64 addrspace(1)* %out.gep
    220   ret void
    221 }
    222 
    223 ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64:
    224 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    225 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    226 ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
    227 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
    228 define void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
    229   %id = call i32 @llvm.amdgcn.workitem.id.x()
    230   %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
    231   %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
    232   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
    233   ret void
    234 }
    235 
    236 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32:
    237 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    238 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    239 define void @flat_atomic_inc_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
    240   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %ptr, i32 42)
    241   store i32 %result, i32 addrspace(4)* %out
    242   ret void
    243 }
    244 
    245 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset:
    246 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    247 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    248 define void @flat_atomic_inc_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
    249   %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
    250   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42)
    251   store i32 %result, i32 addrspace(4)* %out
    252   ret void
    253 }
    254 
    255 ; FUNC-LABEL: {{^}}flat_atomic_inc_noret_i32:
    256 ; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    257 define void @flat_atomic_inc_noret_i32(i32 addrspace(4)* %ptr) nounwind {
    258   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %ptr, i32 42)
    259   ret void
    260 }
    261 
    262 ; FUNC-LABEL: {{^}}flat_atomic_inc_noret_i32_offset:
    263 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    264 ; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    265 define void @flat_atomic_inc_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind {
    266   %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4
    267   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42)
    268   ret void
    269 }
    270 
    271 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset_addr64:
    272 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    273 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
    274 define void @flat_atomic_inc_ret_i32_offset_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 {
    275   %id = call i32 @llvm.amdgcn.workitem.id.x()
    276   %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
    277   %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 %id
    278   %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
    279   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42)
    280   store i32 %result, i32 addrspace(4)* %out.gep
    281   ret void
    282 }
    283 
    284 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset_addr64:
    285 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
    286 ; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
    287 define void @flat_atomic_inc_noret_i32_offset_addr64(i32 addrspace(4)* %ptr) #0 {
    288   %id = call i32 @llvm.amdgcn.workitem.id.x()
    289   %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id
    290   %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5
    291   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42)
    292   ret void
    293 }
    294 
    295 @lds1 = addrspace(3) global [512 x i64] undef, align 8
    296 
    297 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64:
    298 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
    299 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
    300 define void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
    301   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
    302   %idx.0 = add nsw i32 %tid.x, 2
    303   %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
    304   %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9)
    305   store i32 %idx.0, i32 addrspace(1)* %add_use
    306   store i64 %val0, i64 addrspace(1)* %out
    307   ret void
    308 }
    309 
    310 attributes #0 = { nounwind }
    311 attributes #1 = { nounwind readnone }
    312 attributes #2 = { nounwind argmemonly }
    313 
    314 
    315 
    316 
    317 
    318 
    319 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64:
    320 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    321 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    322 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    323 define void @flat_atomic_inc_ret_i64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
    324   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %ptr, i64 42)
    325   store i64 %result, i64 addrspace(4)* %out
    326   ret void
    327 }
    328 
    329 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset:
    330 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    331 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    332 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    333 define void @flat_atomic_inc_ret_i64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
    334   %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
    335   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42)
    336   store i64 %result, i64 addrspace(4)* %out
    337   ret void
    338 }
    339 
    340 ; FUNC-LABEL: {{^}}flat_atomic_inc_noret_i64:
    341 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    342 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    343 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
    344 define void @flat_atomic_inc_noret_i64(i64 addrspace(4)* %ptr) nounwind {
    345   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %ptr, i64 42)
    346   ret void
    347 }
    348 
    349 ; FUNC-LABEL: {{^}}flat_atomic_inc_noret_i64_offset:
    350 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    351 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    352 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
    353 define void @flat_atomic_inc_noret_i64_offset(i64 addrspace(4)* %ptr) nounwind {
    354   %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4
    355   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42)
    356   ret void
    357 }
    358 
    359 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset_addr64:
    360 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    361 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    362 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
    363 define void @flat_atomic_inc_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 {
    364   %id = call i32 @llvm.amdgcn.workitem.id.x()
    365   %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
    366   %out.gep = getelementptr i64, i64 addrspace(4)* %out, i32 %id
    367   %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
    368   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42)
    369   store i64 %result, i64 addrspace(4)* %out.gep
    370   ret void
    371 }
    372 
    373 ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset_addr64:
    374 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
    375 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
    376 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}}
    377 define void @flat_atomic_inc_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 {
    378   %id = call i32 @llvm.amdgcn.workitem.id.x()
    379   %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id
    380   %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5
    381   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42)
    382   ret void
    383 }
    384