1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s 2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 3 4 %struct.ByValStruct = type { [4 x i32] } 5 6 ; GCN-LABEL: {{^}}void_func_byval_struct: 7 ; GCN: s_mov_b32 s5, s32 8 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} 9 ; GCN-NOT: s32 10 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s5 offset:4{{$}} 11 ; GCN-NOT: s32 12 13 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}} 14 ; GCN-NOT: s32 15 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:20{{$}} 16 ; GCN-NOT: s32 17 define void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 { 18 entry: 19 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 20 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4 21 %add = add nsw i32 %tmp, 1 22 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4 23 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 24 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4 25 %add3 = add nsw i32 %tmp1, 2 26 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4 27 store volatile i32 9, i32 addrspace(1)* null, align 4 28 ret void 29 } 30 31 ; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf: 32 ; GCN: s_mov_b32 s5, s32 33 ; GCN-DAG: buffer_store_dword v32 34 ; GCN-DAG: buffer_store_dword v33 35 ; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32 36 ; GCN-DAG: v_writelane_b32 37 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}} 38 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} 39 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]] 40 ; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}} 41 42 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}} 43 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]] 44 45 ; GCN: s_swappc_b64 46 47 ; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:20{{$}} 48 49 ; GCN: v_readlane_b32 50 ; GCN-NOT: v_readlane_b32 s32 51 ; GCN: buffer_load_dword v32, 52 ; GCN: buffer_load_dword v33, 53 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}} 54 ; GCN: s_setpc_b64 55 define void @void_func_byval_struct_non_leaf(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 { 56 entry: 57 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 58 %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4 59 %add = add nsw i32 %tmp, 1 60 store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4 61 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 62 %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4 63 %add3 = add nsw i32 %tmp1, 2 64 call void @external_void_func_void() 65 store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4 66 store volatile i32 9, i32 addrspace(1)* null, align 4 67 ret void 68 } 69 70 ; GCN-LABEL: {{^}}call_void_func_byval_struct_func: 71 ; GCN: s_mov_b32 s5, s32 72 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}} 73 ; GCN-DAG: v_writelane_b32 74 75 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 76 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 77 78 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8 79 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24 80 81 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8 82 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12 83 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16 84 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20 85 86 ; GCN-NOT: s_add_u32 s32, s32, 0x800 87 88 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}} 89 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8 90 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12 91 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16 92 93 ; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 94 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 95 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 96 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 97 98 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20 99 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24 100 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28 101 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32 102 103 ; GCN: s_swappc_b64 104 ; GCN-NOT: v_readlane_b32 s32 105 ; GCN: v_readlane_b32 106 ; GCN-NOT: v_readlane_b32 s32 107 108 ; GCN-NOT: s_sub_u32 s32, s32, 0x800 109 110 ; GCN: s_sub_u32 s32, s32, 0xc00{{$}} 111 ; GCN-NEXT: s_waitcnt 112 ; GCN-NEXT: s_setpc_b64 113 define void @call_void_func_byval_struct_func() #0 { 114 entry: 115 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5) 116 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5) 117 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* 118 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp) 119 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* 120 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1) 121 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 122 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4 123 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 124 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4 125 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1) 126 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1) 127 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp) 128 ret void 129 } 130 131 ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel: 132 ; GCN: s_mov_b32 s33, s7 133 ; GCN: s_add_u32 s32, s33, 0xc00{{$}} 134 135 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 136 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 137 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 138 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24 139 140 ; GCN-NOT: s_add_u32 s32, s32, 0x800 141 142 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 143 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 144 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 145 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 146 147 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}} 148 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8 149 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12 150 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16 151 152 ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24 153 ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28 154 ; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32 155 ; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36 156 157 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20 158 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24 159 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28 160 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32 161 162 163 ; GCN: s_swappc_b64 164 ; GCN-NOT: s_sub_u32 s32 165 ; GCN: s_endpgm 166 define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 { 167 entry: 168 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5) 169 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5) 170 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* 171 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp) 172 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* 173 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1) 174 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 175 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4 176 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 177 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4 178 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1) 179 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1) 180 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp) 181 ret void 182 } 183 184 ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim: 185 define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 { 186 entry: 187 %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5) 188 %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5) 189 %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* 190 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp) 191 %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* 192 call void @llvm.lifetime.start.p5i8(i64 32, i8 addrspace(5)* %tmp1) 193 %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 194 store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4 195 %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 196 store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4 197 call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1) 198 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp1) 199 call void @llvm.lifetime.end.p5i8(i64 32, i8 addrspace(5)* %tmp) 200 ret void 201 } 202 203 declare void @external_void_func_void() #0 204 205 declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #3 206 declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #3 207 208 attributes #0 = { nounwind } 209 attributes #1 = { noinline norecurse nounwind } 210 attributes #2 = { nounwind norecurse "no-frame-pointer-elim"="true" } 211