Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
      2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
      3 
      4 %struct.ByValStruct = type { [4 x i32] }
      5 
      6 ; GCN-LABEL: {{^}}void_func_byval_struct:
      7 ; GCN: s_mov_b32 s5, s32
      8 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
      9 ; GCN-NOT: s32
     10 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s5 offset:4{{$}}
     11 ; GCN-NOT: s32
     12 
     13 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
     14 ; GCN-NOT: s32
     15 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:20{{$}}
     16 ; GCN-NOT: s32
     17 define void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
     18 entry:
     19   %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
     20   %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
     21   %add = add nsw i32 %tmp, 1
     22   store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
     23   %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
     24   %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
     25   %add3 = add nsw i32 %tmp1, 2
     26   store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
     27   store volatile i32 9, i32 addrspace(1)* null, align 4
     28   ret void
     29 }
     30 
     31 ; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf:
     32 ; GCN: s_mov_b32 s5, s32
     33 ; GCN-DAG: buffer_store_dword v32
     34 ; GCN-DAG: buffer_store_dword v33
     35 ; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
     36 ; GCN-DAG: v_writelane_b32
     37 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
     38 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
     39 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
     40 ; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}}
     41 
     42 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
     43 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]
     44 
     45 ; GCN: s_swappc_b64
     46 
     47 ; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:20{{$}}
     48 
     49 ; GCN: v_readlane_b32
     50 ; GCN-NOT: v_readlane_b32 s32
     51 ; GCN: buffer_load_dword v32,
     52 ; GCN: buffer_load_dword v33,
     53 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
     54 ; GCN: s_setpc_b64
     55 define void  @void_func_byval_struct_non_leaf(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
     56 entry:
     57   %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
     58   %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4
     59   %add = add nsw i32 %tmp, 1
     60   store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4
     61   %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
     62   %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4
     63   %add3 = add nsw i32 %tmp1, 2
     64   call void @external_void_func_void()
     65   store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4
     66   store volatile i32 9, i32 addrspace(1)* null, align 4
     67   ret void
     68 }
     69 
     70 ; GCN-LABEL: {{^}}call_void_func_byval_struct_func:
     71 ; GCN: s_mov_b32 s5, s32
     72 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
     73 ; GCN-DAG: v_writelane_b32
     74 
     75 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
     76 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
     77 
     78 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8
     79 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24
     80 
     81 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8
     82 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12
     83 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
     84 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
     85 
     86 ; GCN-NOT: s_add_u32 s32, s32, 0x800
     87 
     88 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
     89 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
     90 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12
     91 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16
     92 
     93 ; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24
     94 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28
     95 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32
     96 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36
     97 
     98 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20
     99 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24
    100 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28
    101 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32
    102 
    103 ; GCN: s_swappc_b64
    104 ; GCN-NOT: v_readlane_b32 s32
    105 ; GCN: v_readlane_b32
    106 ; GCN-NOT: v_readlane_b32 s32
    107 
    108 ; GCN-NOT: s_sub_u32 s32, s32, 0x800
    109 
    110 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
    111 ; GCN-NEXT: s_waitcnt
    112 ; GCN-NEXT: s_setpc_b64
    113 define void @call_void_func_byval_struct_func() #0 {
    114 entry:
    115   %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
    116   %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
    117   %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
    118   call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
    119   %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
    120   call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
    121   %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
    122   store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
    123   %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
    124   store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
    125   call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
    126   call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
    127   call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
    128   ret void
    129 }
    130 
    131 ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel:
    132 ; GCN: s_mov_b32 s33, s7
    133 ; GCN: s_add_u32 s32, s33, 0xc00{{$}}
    134 
    135 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
    136 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
    137 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
    138 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24
    139 
    140 ; GCN-NOT: s_add_u32 s32, s32, 0x800
    141 
    142 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
    143 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
    144 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
    145 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
    146 
    147 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
    148 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
    149 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12
    150 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16
    151 
    152 ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
    153 ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
    154 ; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32
    155 ; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36
    156 
    157 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20
    158 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24
    159 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28
    160 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32
    161 
    162 
    163 ; GCN: s_swappc_b64
    164 ; GCN-NOT: s_sub_u32 s32
    165 ; GCN: s_endpgm
    166 define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 {
    167 entry:
    168   %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
    169   %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
    170   %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
    171   call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
    172   %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
    173   call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
    174   %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
    175   store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
    176   %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
    177   store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
    178   call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
    179   call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
    180   call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
    181   ret void
    182 }
    183 
    184 ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim:
    185 define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 {
    186 entry:
    187   %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5)
    188   %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5)
    189   %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)*
    190   call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp)
    191   %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)*
    192   call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1)
    193   %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0
    194   store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4
    195   %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0
    196   store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4
    197   call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1)
    198   call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1)
    199   call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp)
    200   ret void
    201 }
    202 
    203 declare void @external_void_func_void() #0
    204 
    205 declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #3
    206 declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #3
    207 
    208 attributes #0 = { nounwind }
    209 attributes #1 = { noinline norecurse nounwind }
    210 attributes #2 = { nounwind norecurse "no-frame-pointer-elim"="true" }
    211