1 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC 2 ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC 3 ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE 4 ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC 5 ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA 6 ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC 7 ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC 8 9 declare i32 @llvm.r600.read.tidig.x() nounwind readnone 10 11 ; FUNC-LABEL: {{^}}mova_same_clause: 12 13 ; R600: LDS_WRITE 14 ; R600: LDS_WRITE 15 ; R600: LDS_READ 16 ; R600: LDS_READ 17 18 ; HSA-PROMOTE: .amd_kernel_code_t 19 ; HSA-PROMOTE: workgroup_group_segment_byte_size = 5120 20 ; HSA-PROMOTE: .end_amd_kernel_code_t 21 22 ; SI-PROMOTE: ds_write_b32 23 ; SI-PROMOTE: ds_write_b32 24 ; SI-PROMOTE: ds_read_b32 25 ; SI-PROMOTE: ds_read_b32 26 27 ; HSA-ALLOCA: .amd_kernel_code_t 28 ; FIXME: Creating the emergency stack slots causes us to over-estimate scratch 29 ; by 4 bytes. 30 ; HSA-ALLOCA: workitem_private_segment_byte_size = 24 31 ; HSA-ALLOCA: .end_amd_kernel_code_t 32 33 ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0 34 ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0 35 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { 36 entry: 37 %stack = alloca [5 x i32], align 4 38 %0 = load i32, i32 addrspace(1)* %in, align 4 39 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 40 store i32 4, i32* %arrayidx1, align 4 41 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 42 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 43 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 44 store i32 5, i32* %arrayidx3, align 4 45 %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 46 %2 = load i32, i32* %arrayidx10, align 4 47 store i32 %2, i32 addrspace(1)* %out, align 4 48 %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 49 %3 = load i32, i32* %arrayidx12 50 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 51 store i32 %3, i32 addrspace(1)* %arrayidx13 52 ret void 53 } 54 55 ; This test checks that the stack offset is calculated correctly for structs. 56 ; All register loads/stores should be optimized away, so there shouldn't be 57 ; any MOVA instructions. 58 ; 59 ; XXX: This generated code has unnecessary MOVs, we should be able to optimize 60 ; this. 61 62 ; FUNC-LABEL: {{^}}multiple_structs: 63 ; R600-NOT: MOVA_INT 64 ; SI-NOT: v_movrel 65 ; SI-NOT: v_movrel 66 %struct.point = type { i32, i32 } 67 68 define void @multiple_structs(i32 addrspace(1)* %out) { 69 entry: 70 %a = alloca %struct.point 71 %b = alloca %struct.point 72 %a.x.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0 73 %a.y.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 1 74 %b.x.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0 75 %b.y.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 1 76 store i32 0, i32* %a.x.ptr 77 store i32 1, i32* %a.y.ptr 78 store i32 2, i32* %b.x.ptr 79 store i32 3, i32* %b.y.ptr 80 %a.indirect.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0 81 %b.indirect.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0 82 %a.indirect = load i32, i32* %a.indirect.ptr 83 %b.indirect = load i32, i32* %b.indirect.ptr 84 %0 = add i32 %a.indirect, %b.indirect 85 store i32 %0, i32 addrspace(1)* %out 86 ret void 87 } 88 89 ; Test direct access of a private array inside a loop. The private array 90 ; loads and stores should be lowered to copies, so there shouldn't be any 91 ; MOVA instructions. 92 93 ; FUNC-LABEL: {{^}}direct_loop: 94 ; R600-NOT: MOVA_INT 95 ; SI-NOT: v_movrel 96 97 define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 98 entry: 99 %prv_array_const = alloca [2 x i32] 100 %prv_array = alloca [2 x i32] 101 %a = load i32, i32 addrspace(1)* %in 102 %b_src_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 103 %b = load i32, i32 addrspace(1)* %b_src_ptr 104 %a_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0 105 store i32 %a, i32* %a_dst_ptr 106 %b_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 1 107 store i32 %b, i32* %b_dst_ptr 108 br label %for.body 109 110 for.body: 111 %inc = phi i32 [0, %entry], [%count, %for.body] 112 %x_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0 113 %x = load i32, i32* %x_ptr 114 %y_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0 115 %y = load i32, i32* %y_ptr 116 %xy = add i32 %x, %y 117 store i32 %xy, i32* %y_ptr 118 %count = add i32 %inc, 1 119 %done = icmp eq i32 %count, 4095 120 br i1 %done, label %for.end, label %for.body 121 122 for.end: 123 %value_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0 124 %value = load i32, i32* %value_ptr 125 store i32 %value, i32 addrspace(1)* %out 126 ret void 127 } 128 129 ; FUNC-LABEL: {{^}}short_array: 130 131 ; R600: MOVA_INT 132 133 ; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0 134 ; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:2 ; encoding: [0x02,0x10,0x68,0xe0 135 ; SI-PROMOTE: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} 136 define void @short_array(i32 addrspace(1)* %out, i32 %index) { 137 entry: 138 %0 = alloca [2 x i16] 139 %1 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 0 140 %2 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 1 141 store i16 0, i16* %1 142 store i16 1, i16* %2 143 %3 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 %index 144 %4 = load i16, i16* %3 145 %5 = sext i16 %4 to i32 146 store i32 %5, i32 addrspace(1)* %out 147 ret void 148 } 149 150 ; FUNC-LABEL: {{^}}char_array: 151 152 ; R600: MOVA_INT 153 154 ; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0 155 ; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:1 ; encoding: [0x01,0x10,0x60,0xe0 156 define void @char_array(i32 addrspace(1)* %out, i32 %index) { 157 entry: 158 %0 = alloca [2 x i8] 159 %1 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 0 160 %2 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 1 161 store i8 0, i8* %1 162 store i8 1, i8* %2 163 %3 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 %index 164 %4 = load i8, i8* %3 165 %5 = sext i8 %4 to i32 166 store i32 %5, i32 addrspace(1)* %out 167 ret void 168 169 } 170 171 ; Make sure we don't overwrite workitem information with private memory 172 173 ; FUNC-LABEL: {{^}}work_item_info: 174 ; R600-NOT: MOV T0.X 175 ; Additional check in case the move ends up in the last slot 176 ; R600-NOT: MOV * TO.X 177 178 ; SI-NOT: v_mov_b32_e{{(32|64)}} v0 179 define void @work_item_info(i32 addrspace(1)* %out, i32 %in) { 180 entry: 181 %0 = alloca [2 x i32] 182 %1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0 183 %2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1 184 store i32 0, i32* %1 185 store i32 1, i32* %2 186 %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in 187 %4 = load i32, i32* %3 188 %5 = call i32 @llvm.r600.read.tidig.x() 189 %6 = add i32 %4, %5 190 store i32 %6, i32 addrspace(1)* %out 191 ret void 192 } 193 194 ; Test that two stack objects are not stored in the same register 195 ; The second stack object should be in T3.X 196 ; FUNC-LABEL: {{^}}no_overlap: 197 ; R600_CHECK: MOV 198 ; R600_CHECK: [[CHAN:[XYZW]]]+ 199 ; R600-NOT: [[CHAN]]+ 200 ; SI: v_mov_b32_e32 v3 201 define void @no_overlap(i32 addrspace(1)* %out, i32 %in) { 202 entry: 203 %0 = alloca [3 x i8], align 1 204 %1 = alloca [2 x i8], align 1 205 %2 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 0 206 %3 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 1 207 %4 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 2 208 %5 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 0 209 %6 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 1 210 store i8 0, i8* %2 211 store i8 1, i8* %3 212 store i8 2, i8* %4 213 store i8 1, i8* %5 214 store i8 0, i8* %6 215 %7 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 %in 216 %8 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 %in 217 %9 = load i8, i8* %7 218 %10 = load i8, i8* %8 219 %11 = add i8 %9, %10 220 %12 = sext i8 %11 to i32 221 store i32 %12, i32 addrspace(1)* %out 222 ret void 223 } 224 225 define void @char_array_array(i32 addrspace(1)* %out, i32 %index) { 226 entry: 227 %alloca = alloca [2 x [2 x i8]] 228 %gep0 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0 229 %gep1 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1 230 store i8 0, i8* %gep0 231 store i8 1, i8* %gep1 232 %gep2 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index 233 %load = load i8, i8* %gep2 234 %sext = sext i8 %load to i32 235 store i32 %sext, i32 addrspace(1)* %out 236 ret void 237 } 238 239 define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) { 240 entry: 241 %alloca = alloca [2 x [2 x i32]] 242 %gep0 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0 243 %gep1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1 244 store i32 0, i32* %gep0 245 store i32 1, i32* %gep1 246 %gep2 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index 247 %load = load i32, i32* %gep2 248 store i32 %load, i32 addrspace(1)* %out 249 ret void 250 } 251 252 define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) { 253 entry: 254 %alloca = alloca [2 x [2 x i64]] 255 %gep0 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0 256 %gep1 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1 257 store i64 0, i64* %gep0 258 store i64 1, i64* %gep1 259 %gep2 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index 260 %load = load i64, i64* %gep2 261 store i64 %load, i64 addrspace(1)* %out 262 ret void 263 } 264 265 %struct.pair32 = type { i32, i32 } 266 267 define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) { 268 entry: 269 %alloca = alloca [2 x [2 x %struct.pair32]] 270 %gep0 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1 271 %gep1 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1 272 store i32 0, i32* %gep0 273 store i32 1, i32* %gep1 274 %gep2 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0 275 %load = load i32, i32* %gep2 276 store i32 %load, i32 addrspace(1)* %out 277 ret void 278 } 279 280 define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) { 281 entry: 282 %alloca = alloca [2 x %struct.pair32] 283 %gep0 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1 284 %gep1 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0 285 store i32 0, i32* %gep0 286 store i32 1, i32* %gep1 287 %gep2 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0 288 %load = load i32, i32* %gep2 289 store i32 %load, i32 addrspace(1)* %out 290 ret void 291 } 292 293 define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind { 294 entry: 295 %tmp = alloca [2 x i32] 296 %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0 297 %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1 298 store i32 0, i32* %tmp1 299 store i32 1, i32* %tmp2 300 %cmp = icmp eq i32 %in, 0 301 %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2 302 %load = load i32, i32* %sel 303 store i32 %load, i32 addrspace(1)* %out 304 ret void 305 } 306 307 ; AMDGPUPromoteAlloca does not know how to handle ptrtoint. When it 308 ; finds one, it should stop trying to promote. 309 310 ; FUNC-LABEL: ptrtoint: 311 ; SI-NOT: ds_write 312 ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen 313 ; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; 314 define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) { 315 %alloca = alloca [16 x i32] 316 %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a 317 store i32 5, i32* %tmp0 318 %tmp1 = ptrtoint [16 x i32]* %alloca to i32 319 %tmp2 = add i32 %tmp1, 5 320 %tmp3 = inttoptr i32 %tmp2 to i32* 321 %tmp4 = getelementptr i32, i32* %tmp3, i32 %b 322 %tmp5 = load i32, i32* %tmp4 323 store i32 %tmp5, i32 addrspace(1)* %out 324 ret void 325 } 326