1 ; RUN: llc -fast-isel-sink-local-values < %s -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s 2 ; RUN: llc -fast-isel-sink-local-values < %s -mtriple=arm64-apple-darwin -O0 -disable-fp-elim -fast-isel | FileCheck -check-prefix=FAST %s 3 4 ; rdar://12648441 5 ; Generated from arm64-arguments.c with -O2. 6 ; Test passing structs with size < 8, < 16 and > 16 7 ; with alignment of 16 and without 8 9 ; Structs with size < 8 10 %struct.s38 = type { i32, i16 } 11 ; With alignment of 16, the size will be padded to multiple of 16 bytes. 12 %struct.s39 = type { i32, i16, [10 x i8] } 13 ; Structs with size < 16 14 %struct.s40 = type { i32, i16, i32, i16 } 15 %struct.s41 = type { i32, i16, i32, i16 } 16 ; Structs with size > 16 17 %struct.s42 = type { i32, i16, i32, i16, i32, i16 } 18 %struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] } 19 20 @g38 = common global %struct.s38 zeroinitializer, align 4 21 @g38_2 = common global %struct.s38 zeroinitializer, align 4 22 @g39 = common global %struct.s39 zeroinitializer, align 16 23 @g39_2 = common global %struct.s39 zeroinitializer, align 16 24 @g40 = common global %struct.s40 zeroinitializer, align 4 25 @g40_2 = common global %struct.s40 zeroinitializer, align 4 26 @g41 = common global %struct.s41 zeroinitializer, align 16 27 @g41_2 = common global %struct.s41 zeroinitializer, align 16 28 @g42 = common global %struct.s42 zeroinitializer, align 4 29 @g42_2 = common global %struct.s42 zeroinitializer, align 4 30 @g43 = common global %struct.s43 zeroinitializer, align 16 31 @g43_2 = common global %struct.s43 zeroinitializer, align 16 32 33 ; structs with size < 8 bytes, passed via i64 in x1 and x2 34 define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 { 35 entry: 36 ; CHECK-LABEL: f38 37 ; CHECK: add w[[A:[0-9]+]], w1, w0 38 ; CHECK: add {{w[0-9]+}}, w[[A]], w2 39 %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32 40 %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32 41 %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32 42 %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32 43 %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16 44 %sext = trunc i64 %sext8 to i32 45 %conv = ashr exact i32 %sext, 16 46 %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16 47 %sext10 = trunc i64 %sext1011 to i32 48 %conv6 = ashr exact i32 %sext10, 16 49 %add = add i32 %s1.sroa.0.0.extract.trunc, %i 50 %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc 51 %add4 = add i32 %add3, %conv 52 %add7 = add i32 %add4, %conv6 53 ret i32 %add7 54 } 55 56 define i32 @caller38() #1 { 57 entry: 58 ; CHECK-LABEL: caller38 59 ; CHECK: ldr x1, 60 ; CHECK: ldr x2, 61 %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4 62 %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 63 %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5 64 ret i32 %call 65 } 66 67 declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, 68 i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0 69 70 ; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16] 71 ; i9 at [sp] 72 define i32 @caller38_stack() #1 { 73 entry: 74 ; CHECK-LABEL: caller38_stack 75 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] 76 ; CHECK: mov w[[C:[0-9]+]], #9 77 ; CHECK: str w[[C]], [sp] 78 %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4 79 %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 80 %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 81 i32 7, i32 8, i32 9, i64 %0, i64 %1) #5 82 ret i32 %call 83 } 84 85 ; structs with size < 8 bytes, alignment of 16 86 ; passed via i128 in x1 and x3 87 define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 { 88 entry: 89 ; CHECK-LABEL: f39 90 ; CHECK: add w[[A:[0-9]+]], w1, w0 91 ; CHECK: add {{w[0-9]+}}, w[[A]], w3 92 %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32 93 %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32 94 %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32 95 %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32 96 %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16 97 %sext = trunc i128 %sext8 to i32 98 %conv = ashr exact i32 %sext, 16 99 %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16 100 %sext10 = trunc i128 %sext1011 to i32 101 %conv6 = ashr exact i32 %sext10, 16 102 %add = add i32 %s1.sroa.0.0.extract.trunc, %i 103 %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc 104 %add4 = add i32 %add3, %conv 105 %add7 = add i32 %add4, %conv6 106 ret i32 %add7 107 } 108 109 define i32 @caller39() #1 { 110 entry: 111 ; CHECK-LABEL: caller39 112 ; CHECK: ldp x1, x2, 113 ; CHECK: ldp x3, x4, 114 %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16 115 %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 116 %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5 117 ret i32 %call 118 } 119 120 declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, 121 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0 122 123 ; structs with size < 8 bytes, alignment 16 124 ; passed on stack at [sp+16] and [sp+32] 125 define i32 @caller39_stack() #1 { 126 entry: 127 ; CHECK-LABEL: caller39_stack 128 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32] 129 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] 130 ; CHECK: mov w[[C:[0-9]+]], #9 131 ; CHECK: str w[[C]], [sp] 132 %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16 133 %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 134 %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 135 i32 7, i32 8, i32 9, i128 %0, i128 %1) #5 136 ret i32 %call 137 } 138 139 ; structs with size < 16 bytes 140 ; passed via i128 in x1 and x3 141 define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 { 142 entry: 143 ; CHECK-LABEL: f40 144 ; CHECK: add w[[A:[0-9]+]], w1, w0 145 ; CHECK: add {{w[0-9]+}}, w[[A]], w3 146 %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0 147 %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0 148 %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32 149 %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32 150 %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32 151 %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16 152 %sext = trunc i64 %sext8 to i32 153 %conv = ashr exact i32 %sext, 16 154 %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32 155 %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16 156 %sext10 = trunc i64 %sext1011 to i32 157 %conv6 = ashr exact i32 %sext10, 16 158 %add = add i32 %s1.sroa.0.0.extract.trunc, %i 159 %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc 160 %add4 = add i32 %add3, %conv 161 %add7 = add i32 %add4, %conv6 162 ret i32 %add7 163 } 164 165 define i32 @caller40() #1 { 166 entry: 167 ; CHECK-LABEL: caller40 168 ; CHECK: ldp x1, x2, 169 ; CHECK: ldp x3, x4, 170 %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 171 %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 172 %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5 173 ret i32 %call 174 } 175 176 declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, 177 i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 178 179 ; structs with size < 16 bytes 180 ; passed on stack at [sp+8] and [sp+24] 181 define i32 @caller40_stack() #1 { 182 entry: 183 ; CHECK-LABEL: caller40_stack 184 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24] 185 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] 186 ; CHECK: mov w[[C:[0-9]+]], #9 187 ; CHECK: str w[[C]], [sp] 188 %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 189 %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 190 %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 191 i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5 192 ret i32 %call 193 } 194 195 ; structs with size < 16 bytes, alignment of 16 196 ; passed via i128 in x1 and x3 197 define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 { 198 entry: 199 ; CHECK-LABEL: f41 200 ; CHECK: add w[[A:[0-9]+]], w1, w0 201 ; CHECK: add {{w[0-9]+}}, w[[A]], w3 202 %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32 203 %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32 204 %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32 205 %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32 206 %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16 207 %sext = trunc i128 %sext8 to i32 208 %conv = ashr exact i32 %sext, 16 209 %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16 210 %sext10 = trunc i128 %sext1011 to i32 211 %conv6 = ashr exact i32 %sext10, 16 212 %add = add i32 %s1.sroa.0.0.extract.trunc, %i 213 %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc 214 %add4 = add i32 %add3, %conv 215 %add7 = add i32 %add4, %conv6 216 ret i32 %add7 217 } 218 219 define i32 @caller41() #1 { 220 entry: 221 ; CHECK-LABEL: caller41 222 ; CHECK: ldp x1, x2, 223 ; CHECK: ldp x3, x4, 224 %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16 225 %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 226 %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5 227 ret i32 %call 228 } 229 230 declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, 231 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0 232 233 ; structs with size < 16 bytes, alignment of 16 234 ; passed on stack at [sp+16] and [sp+32] 235 define i32 @caller41_stack() #1 { 236 entry: 237 ; CHECK-LABEL: caller41_stack 238 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32] 239 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] 240 ; CHECK: mov w[[C:[0-9]+]], #9 241 ; CHECK: str w[[C]], [sp] 242 %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16 243 %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 244 %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 245 i32 7, i32 8, i32 9, i128 %0, i128 %1) #5 246 ret i32 %call 247 } 248 249 ; structs with size of 22 bytes, passed indirectly in x1 and x2 250 define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 { 251 entry: 252 ; CHECK-LABEL: f42 253 ; CHECK: ldr w[[A:[0-9]+]], [x1] 254 ; CHECK: ldr w[[B:[0-9]+]], [x2] 255 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0 256 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]] 257 ; FAST: f42 258 ; FAST: ldr w[[A:[0-9]+]], [x1] 259 ; FAST: ldr w[[B:[0-9]+]], [x2] 260 ; FAST: add w[[C:[0-9]+]], w[[A]], w0 261 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]] 262 %i1 = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 0 263 %0 = load i32, i32* %i1, align 4, !tbaa !0 264 %i2 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 0 265 %1 = load i32, i32* %i2, align 4, !tbaa !0 266 %s = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 1 267 %2 = load i16, i16* %s, align 2, !tbaa !3 268 %conv = sext i16 %2 to i32 269 %s5 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 1 270 %3 = load i16, i16* %s5, align 2, !tbaa !3 271 %conv6 = sext i16 %3 to i32 272 %add = add i32 %0, %i 273 %add3 = add i32 %add, %1 274 %add4 = add i32 %add3, %conv 275 %add7 = add i32 %add4, %conv6 276 ret i32 %add7 277 } 278 279 ; For s1, we allocate a 22-byte space, pass its address via x1 280 define i32 @caller42() #3 { 281 entry: 282 ; CHECK-LABEL: caller42 283 ; CHECK-DAG: str {{x[0-9]+}}, [sp, #48] 284 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] 285 ; CHECK-DAG: str {{x[0-9]+}}, [sp, #16] 286 ; CHECK-DAG: str {{q[0-9]+}}, [sp] 287 ; CHECK: add x1, sp, #32 288 ; CHECK: mov x2, sp 289 ; Space for s1 is allocated at sp+32 290 ; Space for s2 is allocated at sp 291 292 ; FAST-LABEL: caller42 293 ; FAST: sub sp, sp, #96 294 ; Space for s1 is allocated at fp-24 = sp+56 295 ; FAST: sub x[[A:[0-9]+]], x29, #24 296 ; Call memcpy with size = 24 (0x18) 297 ; FAST: orr {{x[0-9]+}}, xzr, #0x18 298 ; Space for s2 is allocated at sp+32 299 ; FAST: add x[[A:[0-9]+]], sp, #32 300 ; FAST: bl _memcpy 301 %tmp = alloca %struct.s42, align 4 302 %tmp1 = alloca %struct.s42, align 4 303 %0 = bitcast %struct.s42* %tmp to i8* 304 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4 305 %1 = bitcast %struct.s42* %tmp1 to i8* 306 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4 307 %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5 308 ret i32 %call 309 } 310 311 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #4 312 313 declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, 314 i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1, 315 %struct.s42* nocapture %s2) #2 316 317 define i32 @caller42_stack() #3 { 318 entry: 319 ; CHECK-LABEL: caller42_stack 320 ; CHECK: sub sp, sp, #112 321 ; CHECK: add x29, sp, #96 322 ; CHECK-DAG: stur {{x[0-9]+}}, [x29, #-16] 323 ; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32] 324 ; CHECK-DAG: str {{x[0-9]+}}, [sp, #48] 325 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] 326 ; Space for s1 is allocated at x29-32 = sp+64 327 ; Space for s2 is allocated at sp+32 328 ; CHECK: add x[[B:[0-9]+]], sp, #32 329 ; CHECK: str x[[B]], [sp, #16] 330 ; CHECK: sub x[[A:[0-9]+]], x29, #32 331 ; Address of s1 is passed on stack at sp+8 332 ; CHECK: str x[[A]], [sp, #8] 333 ; CHECK: mov w[[C:[0-9]+]], #9 334 ; CHECK: str w[[C]], [sp] 335 336 ; FAST-LABEL: caller42_stack 337 ; Space for s1 is allocated at fp-24 338 ; FAST: sub x[[A:[0-9]+]], x29, #24 339 ; Call memcpy with size = 24 (0x18) 340 ; FAST: orr {{x[0-9]+}}, xzr, #0x18 341 ; FAST: bl _memcpy 342 ; Space for s2 is allocated at fp-48 343 ; FAST: sub x[[B:[0-9]+]], x29, #48 344 ; Call memcpy again 345 ; FAST: bl _memcpy 346 ; Address of s1 is passed on stack at sp+8 347 ; FAST: str {{w[0-9]+}}, [sp] 348 ; FAST: str {{x[0-9]+}}, [sp, #8] 349 ; FAST: str {{x[0-9]+}}, [sp, #16] 350 %tmp = alloca %struct.s42, align 4 351 %tmp1 = alloca %struct.s42, align 4 352 %0 = bitcast %struct.s42* %tmp to i8* 353 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4 354 %1 = bitcast %struct.s42* %tmp1 to i8* 355 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4 356 %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 357 i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5 358 ret i32 %call 359 } 360 361 ; structs with size of 22 bytes, alignment of 16 362 ; passed indirectly in x1 and x2 363 define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 { 364 entry: 365 ; CHECK-LABEL: f43 366 ; CHECK: ldr w[[A:[0-9]+]], [x1] 367 ; CHECK: ldr w[[B:[0-9]+]], [x2] 368 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0 369 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]] 370 ; FAST-LABEL: f43 371 ; FAST: ldr w[[A:[0-9]+]], [x1] 372 ; FAST: ldr w[[B:[0-9]+]], [x2] 373 ; FAST: add w[[C:[0-9]+]], w[[A]], w0 374 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]] 375 %i1 = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 0 376 %0 = load i32, i32* %i1, align 4, !tbaa !0 377 %i2 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 0 378 %1 = load i32, i32* %i2, align 4, !tbaa !0 379 %s = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 1 380 %2 = load i16, i16* %s, align 2, !tbaa !3 381 %conv = sext i16 %2 to i32 382 %s5 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 1 383 %3 = load i16, i16* %s5, align 2, !tbaa !3 384 %conv6 = sext i16 %3 to i32 385 %add = add i32 %0, %i 386 %add3 = add i32 %add, %1 387 %add4 = add i32 %add3, %conv 388 %add7 = add i32 %add4, %conv6 389 ret i32 %add7 390 } 391 392 define i32 @caller43() #3 { 393 entry: 394 ; CHECK-LABEL: caller43 395 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] 396 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] 397 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #16] 398 ; CHECK-DAG: str {{q[0-9]+}}, [sp] 399 ; CHECK: add x1, sp, #32 400 ; CHECK: mov x2, sp 401 ; Space for s1 is allocated at sp+32 402 ; Space for s2 is allocated at sp 403 404 ; FAST-LABEL: caller43 405 ; FAST: add x29, sp, #64 406 ; Space for s1 is allocated at sp+32 407 ; Space for s2 is allocated at sp 408 ; FAST: str {{x[0-9]+}}, [sp, #32] 409 ; FAST: str {{x[0-9]+}}, [sp, #40] 410 ; FAST: str {{x[0-9]+}}, [sp, #48] 411 ; FAST: str {{x[0-9]+}}, [sp, #56] 412 ; FAST: str {{x[0-9]+}}, [sp] 413 ; FAST: str {{x[0-9]+}}, [sp, #8] 414 ; FAST: str {{x[0-9]+}}, [sp, #16] 415 ; FAST: str {{x[0-9]+}}, [sp, #24] 416 ; FAST: add x1, sp, #32 417 ; FAST: mov x2, sp 418 %tmp = alloca %struct.s43, align 16 419 %tmp1 = alloca %struct.s43, align 16 420 %0 = bitcast %struct.s43* %tmp to i8* 421 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4 422 %1 = bitcast %struct.s43* %tmp1 to i8* 423 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4 424 %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5 425 ret i32 %call 426 } 427 428 declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, 429 i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1, 430 %struct.s43* nocapture %s2) #2 431 432 define i32 @caller43_stack() #3 { 433 entry: 434 ; CHECK-LABEL: caller43_stack 435 ; CHECK: sub sp, sp, #112 436 ; CHECK: add x29, sp, #96 437 ; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16] 438 ; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32] 439 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] 440 ; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] 441 ; Space for s1 is allocated at x29-32 = sp+64 442 ; Space for s2 is allocated at sp+32 443 ; CHECK: add x[[B:[0-9]+]], sp, #32 444 ; CHECK: str x[[B]], [sp, #16] 445 ; CHECK: sub x[[A:[0-9]+]], x29, #32 446 ; Address of s1 is passed on stack at sp+8 447 ; CHECK: str x[[A]], [sp, #8] 448 ; CHECK: mov w[[C:[0-9]+]], #9 449 ; CHECK: str w[[C]], [sp] 450 451 ; FAST-LABEL: caller43_stack 452 ; FAST: sub sp, sp, #112 453 ; Space for s1 is allocated at fp-32 = sp+64 454 ; Space for s2 is allocated at sp+32 455 ; FAST: stur {{x[0-9]+}}, [x29, #-32] 456 ; FAST: stur {{x[0-9]+}}, [x29, #-24] 457 ; FAST: stur {{x[0-9]+}}, [x29, #-16] 458 ; FAST: stur {{x[0-9]+}}, [x29, #-8] 459 ; FAST: str {{x[0-9]+}}, [sp, #32] 460 ; FAST: str {{x[0-9]+}}, [sp, #40] 461 ; FAST: str {{x[0-9]+}}, [sp, #48] 462 ; FAST: str {{x[0-9]+}}, [sp, #56] 463 ; FAST: str {{w[0-9]+}}, [sp] 464 ; Address of s1 is passed on stack at sp+8 465 ; FAST: sub x[[A:[0-9]+]], x29, #32 466 ; FAST: str x[[A]], [sp, #8] 467 ; FAST: add x[[B:[0-9]+]], sp, #32 468 ; FAST: str x[[B]], [sp, #16] 469 %tmp = alloca %struct.s43, align 16 470 %tmp1 = alloca %struct.s43, align 16 471 %0 = bitcast %struct.s43* %tmp to i8* 472 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4 473 %1 = bitcast %struct.s43* %tmp1 to i8* 474 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4 475 %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 476 i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5 477 ret i32 %call 478 } 479 480 ; rdar://13668927 481 ; Check that we don't split an i128. 482 declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, 483 i32 %i6, i32 %i7, i128 %s1, i32 %i8) 484 485 define i32 @i128_split() { 486 entry: 487 ; CHECK-LABEL: i128_split 488 ; "i128 %0" should be on stack at [sp]. 489 ; "i32 8" should be on stack at [sp, #16]. 490 ; CHECK: str {{w[0-9]+}}, [sp, #16] 491 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] 492 ; FAST-LABEL: i128_split 493 ; FAST: sub sp, sp 494 ; FAST: mov x[[ADDR:[0-9]+]], sp 495 ; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16] 496 ; Load/Store opt is disabled with -O0, so the i128 is split. 497 ; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8] 498 ; FAST: str {{x[0-9]+}}, [x[[ADDR]]] 499 %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16 500 %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5, 501 i32 6, i32 7, i128 %0, i32 8) #5 502 ret i32 %call 503 } 504 505 declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, 506 i32 %i6, i32 %i7, i64 %s1, i32 %i8) 507 508 define i32 @i64_split() { 509 entry: 510 ; CHECK-LABEL: i64_split 511 ; "i64 %0" should be in register x7. 512 ; "i32 8" should be on stack at [sp]. 513 ; CHECK: ldr x7, [{{x[0-9]+}}] 514 ; CHECK: str {{w[0-9]+}}, [sp] 515 ; FAST-LABEL: i64_split 516 ; FAST: ldr x7, [{{x[0-9]+}}] 517 ; FAST: mov x[[R0:[0-9]+]], sp 518 ; FAST: orr w[[R1:[0-9]+]], wzr, #0x8 519 ; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}} 520 %0 = load i64, i64* bitcast (%struct.s41* @g41 to i64*), align 16 521 %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5, 522 i32 6, i32 7, i64 %0, i32 8) #5 523 ret i32 %call 524 } 525 526 attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } 527 attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } 528 attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } 529 attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } 530 attributes #4 = { nounwind } 531 attributes #5 = { nobuiltin } 532 533 !0 = !{!"int", !1} 534 !1 = !{!"omnipotent char", !2} 535 !2 = !{!"Simple C/C++ TBAA"} 536 !3 = !{!"short", !1} 537 !4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3} 538