Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
      2 ; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
      3 target triple = "arm64-apple-darwin"
      4 
      5 ; rdar://12648441
      6 ; Generated from arm64-arguments.c with -O2.
      7 ; Test passing structs with size < 8, < 16 and > 16
      8 ; with alignment of 16 and without
      9 
     10 ; Structs with size < 8
     11 %struct.s38 = type { i32, i16 }
     12 ; With alignment of 16, the size will be padded to multiple of 16 bytes.
     13 %struct.s39 = type { i32, i16, [10 x i8] }
     14 ; Structs with size < 16
     15 %struct.s40 = type { i32, i16, i32, i16 }
     16 %struct.s41 = type { i32, i16, i32, i16 }
     17 ; Structs with size > 16
     18 %struct.s42 = type { i32, i16, i32, i16, i32, i16 }
     19 %struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] }
     20 
     21 @g38 = common global %struct.s38 zeroinitializer, align 4
     22 @g38_2 = common global %struct.s38 zeroinitializer, align 4
     23 @g39 = common global %struct.s39 zeroinitializer, align 16
     24 @g39_2 = common global %struct.s39 zeroinitializer, align 16
     25 @g40 = common global %struct.s40 zeroinitializer, align 4
     26 @g40_2 = common global %struct.s40 zeroinitializer, align 4
     27 @g41 = common global %struct.s41 zeroinitializer, align 16
     28 @g41_2 = common global %struct.s41 zeroinitializer, align 16
     29 @g42 = common global %struct.s42 zeroinitializer, align 4
     30 @g42_2 = common global %struct.s42 zeroinitializer, align 4
     31 @g43 = common global %struct.s43 zeroinitializer, align 16
     32 @g43_2 = common global %struct.s43 zeroinitializer, align 16
     33 
     34 ; structs with size < 8 bytes, passed via i64 in x1 and x2
     35 define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 {
     36 entry:
     37 ; CHECK: f38
     38 ; CHECK: add w[[A:[0-9]+]], w1, w0
     39 ; CHECK: add {{w[0-9]+}}, w[[A]], w2
     40   %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32
     41   %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32
     42   %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32
     43   %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32
     44   %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16
     45   %sext = trunc i64 %sext8 to i32
     46   %conv = ashr exact i32 %sext, 16
     47   %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16
     48   %sext10 = trunc i64 %sext1011 to i32
     49   %conv6 = ashr exact i32 %sext10, 16
     50   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
     51   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
     52   %add4 = add i32 %add3, %conv
     53   %add7 = add i32 %add4, %conv6
     54   ret i32 %add7
     55 }
     56 
     57 define i32 @caller38() #1 {
     58 entry:
     59 ; CHECK: caller38
     60 ; CHECK: ldr x1,
     61 ; CHECK: ldr x2,
     62   %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
     63   %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
     64   %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
     65   ret i32 %call
     66 }
     67 
     68 declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
     69                 i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0
     70 
     71 ; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16]
     72 ; i9 at [sp]
     73 define i32 @caller38_stack() #1 {
     74 entry:
     75 ; CHECK: caller38_stack
     76 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
     77 ; CHECK: movz w[[C:[0-9]+]], #0x9
     78 ; CHECK: str w[[C]], [sp]
     79   %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
     80   %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
     81   %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
     82                                    i32 7, i32 8, i32 9, i64 %0, i64 %1) #5
     83   ret i32 %call
     84 }
     85 
     86 ; structs with size < 8 bytes, alignment of 16
     87 ; passed via i128 in x1 and x3
     88 define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
     89 entry:
     90 ; CHECK: f39
     91 ; CHECK: add w[[A:[0-9]+]], w1, w0
     92 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
     93   %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
     94   %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
     95   %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
     96   %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
     97   %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
     98   %sext = trunc i128 %sext8 to i32
     99   %conv = ashr exact i32 %sext, 16
    100   %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
    101   %sext10 = trunc i128 %sext1011 to i32
    102   %conv6 = ashr exact i32 %sext10, 16
    103   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
    104   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
    105   %add4 = add i32 %add3, %conv
    106   %add7 = add i32 %add4, %conv6
    107   ret i32 %add7
    108 }
    109 
    110 define i32 @caller39() #1 {
    111 entry:
    112 ; CHECK: caller39
    113 ; CHECK: ldp x1, x2,
    114 ; CHECK: ldp x3, x4,
    115   %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
    116   %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
    117   %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
    118   ret i32 %call
    119 }
    120 
    121 declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
    122                 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
    123 
    124 ; structs with size < 8 bytes, alignment 16
    125 ; passed on stack at [sp+16] and [sp+32]
    126 define i32 @caller39_stack() #1 {
    127 entry:
    128 ; CHECK: caller39_stack
    129 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
    130 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
    131 ; CHECK: movz w[[C:[0-9]+]], #0x9
    132 ; CHECK: str w[[C]], [sp]
    133   %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
    134   %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
    135   %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
    136                                    i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
    137   ret i32 %call
    138 }
    139 
    140 ; structs with size < 16 bytes
    141 ; passed via i128 in x1 and x3
    142 define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 {
    143 entry:
    144 ; CHECK: f40
    145 ; CHECK: add w[[A:[0-9]+]], w1, w0
    146 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
    147   %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0
    148   %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0
    149   %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32
    150   %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32
    151   %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32
    152   %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16
    153   %sext = trunc i64 %sext8 to i32
    154   %conv = ashr exact i32 %sext, 16
    155   %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32
    156   %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16
    157   %sext10 = trunc i64 %sext1011 to i32
    158   %conv6 = ashr exact i32 %sext10, 16
    159   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
    160   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
    161   %add4 = add i32 %add3, %conv
    162   %add7 = add i32 %add4, %conv6
    163   ret i32 %add7
    164 }
    165 
    166 define i32 @caller40() #1 {
    167 entry:
    168 ; CHECK: caller40
    169 ; CHECK: ldp x1, x2,
    170 ; CHECK: ldp x3, x4,
    171   %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
    172   %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
    173   %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
    174   ret i32 %call
    175 }
    176 
    177 declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
    178                 i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0
    179 
    180 ; structs with size < 16 bytes
    181 ; passed on stack at [sp+8] and [sp+24]
    182 define i32 @caller40_stack() #1 {
    183 entry:
    184 ; CHECK: caller40_stack
    185 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
    186 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
    187 ; CHECK: movz w[[C:[0-9]+]], #0x9
    188 ; CHECK: str w[[C]], [sp]
    189   %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
    190   %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
    191   %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
    192                          i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5
    193   ret i32 %call
    194 }
    195 
    196 ; structs with size < 16 bytes, alignment of 16
    197 ; passed via i128 in x1 and x3
    198 define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
    199 entry:
    200 ; CHECK: f41
    201 ; CHECK: add w[[A:[0-9]+]], w1, w0
    202 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
    203   %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
    204   %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
    205   %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
    206   %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
    207   %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
    208   %sext = trunc i128 %sext8 to i32
    209   %conv = ashr exact i32 %sext, 16
    210   %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
    211   %sext10 = trunc i128 %sext1011 to i32
    212   %conv6 = ashr exact i32 %sext10, 16
    213   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
    214   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
    215   %add4 = add i32 %add3, %conv
    216   %add7 = add i32 %add4, %conv6
    217   ret i32 %add7
    218 }
    219 
    220 define i32 @caller41() #1 {
    221 entry:
    222 ; CHECK: caller41
    223 ; CHECK: ldp x1, x2,
    224 ; CHECK: ldp x3, x4,
    225   %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
    226   %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
    227   %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
    228   ret i32 %call
    229 }
    230 
    231 declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
    232                 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
    233 
    234 ; structs with size < 16 bytes, alignment of 16
    235 ; passed on stack at [sp+16] and [sp+32]
    236 define i32 @caller41_stack() #1 {
    237 entry:
    238 ; CHECK: caller41_stack
    239 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
    240 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
    241 ; CHECK: movz w[[C:[0-9]+]], #0x9
    242 ; CHECK: str w[[C]], [sp]
    243   %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
    244   %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
    245   %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
    246                             i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
    247   ret i32 %call
    248 }
    249 
    250 ; structs with size of 22 bytes, passed indirectly in x1 and x2
    251 define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 {
    252 entry:
    253 ; CHECK: f42
    254 ; CHECK: ldr w[[A:[0-9]+]], [x1]
    255 ; CHECK: ldr w[[B:[0-9]+]], [x2]
    256 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0
    257 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
    258 ; FAST: f42
    259 ; FAST: ldr w[[A:[0-9]+]], [x1]
    260 ; FAST: ldr w[[B:[0-9]+]], [x2]
    261 ; FAST: add w[[C:[0-9]+]], w[[A]], w0
    262 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
    263   %i1 = getelementptr inbounds %struct.s42* %s1, i64 0, i32 0
    264   %0 = load i32* %i1, align 4, !tbaa !0
    265   %i2 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 0
    266   %1 = load i32* %i2, align 4, !tbaa !0
    267   %s = getelementptr inbounds %struct.s42* %s1, i64 0, i32 1
    268   %2 = load i16* %s, align 2, !tbaa !3
    269   %conv = sext i16 %2 to i32
    270   %s5 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 1
    271   %3 = load i16* %s5, align 2, !tbaa !3
    272   %conv6 = sext i16 %3 to i32
    273   %add = add i32 %0, %i
    274   %add3 = add i32 %add, %1
    275   %add4 = add i32 %add3, %conv
    276   %add7 = add i32 %add4, %conv6
    277   ret i32 %add7
    278 }
    279 
    280 ; For s1, we allocate a 22-byte space, pass its address via x1
    281 define i32 @caller42() #3 {
    282 entry:
    283 ; CHECK: caller42
    284 ; CHECK: str {{x[0-9]+}}, [sp, #48]
    285 ; CHECK: str {{q[0-9]+}}, [sp, #32]
    286 ; CHECK: str {{x[0-9]+}}, [sp, #16]
    287 ; CHECK: str {{q[0-9]+}}, [sp]
    288 ; CHECK: add x1, sp, #32
    289 ; CHECK: mov x2, sp
    290 ; Space for s1 is allocated at sp+32
    291 ; Space for s2 is allocated at sp
    292 
    293 ; FAST: caller42
    294 ; FAST: sub sp, sp, #96
    295 ; Space for s1 is allocated at fp-24 = sp+72
    296 ; Space for s2 is allocated at sp+48
    297 ; FAST: sub x[[A:[0-9]+]], x29, #24
    298 ; FAST: add x[[A:[0-9]+]], sp, #48
    299 ; Call memcpy with size = 24 (0x18)
    300 ; FAST: orr {{x[0-9]+}}, xzr, #0x18
    301   %tmp = alloca %struct.s42, align 4
    302   %tmp1 = alloca %struct.s42, align 4
    303   %0 = bitcast %struct.s42* %tmp to i8*
    304   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
    305   %1 = bitcast %struct.s42* %tmp1 to i8*
    306   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
    307   %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5
    308   ret i32 %call
    309 }
    310 
    311 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4
    312 
    313 declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
    314                        i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1,
    315                        %struct.s42* nocapture %s2) #2
    316 
    317 define i32 @caller42_stack() #3 {
    318 entry:
    319 ; CHECK: caller42_stack
    320 ; CHECK: mov x29, sp
    321 ; CHECK: sub sp, sp, #96
    322 ; CHECK: stur {{x[0-9]+}}, [x29, #-16]
    323 ; CHECK: stur {{q[0-9]+}}, [x29, #-32]
    324 ; CHECK: str {{x[0-9]+}}, [sp, #48]
    325 ; CHECK: str {{q[0-9]+}}, [sp, #32]
    326 ; Space for s1 is allocated at x29-32 = sp+64
    327 ; Space for s2 is allocated at sp+32
    328 ; CHECK: add x[[B:[0-9]+]], sp, #32
    329 ; CHECK: str x[[B]], [sp, #16]
    330 ; CHECK: sub x[[A:[0-9]+]], x29, #32
    331 ; Address of s1 is passed on stack at sp+8
    332 ; CHECK: str x[[A]], [sp, #8]
    333 ; CHECK: movz w[[C:[0-9]+]], #0x9
    334 ; CHECK: str w[[C]], [sp]
    335 
    336 ; FAST: caller42_stack
    337 ; Space for s1 is allocated at fp-24
    338 ; Space for s2 is allocated at fp-48
    339 ; FAST: sub x[[A:[0-9]+]], x29, #24
    340 ; FAST: sub x[[B:[0-9]+]], x29, #48
    341 ; Call memcpy with size = 24 (0x18)
    342 ; FAST: orr {{x[0-9]+}}, xzr, #0x18
    343 ; FAST: str {{w[0-9]+}}, [sp]
    344 ; Address of s1 is passed on stack at sp+8
    345 ; FAST: str {{x[0-9]+}}, [sp, #8]
    346 ; FAST: str {{x[0-9]+}}, [sp, #16]
    347   %tmp = alloca %struct.s42, align 4
    348   %tmp1 = alloca %struct.s42, align 4
    349   %0 = bitcast %struct.s42* %tmp to i8*
    350   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
    351   %1 = bitcast %struct.s42* %tmp1 to i8*
    352   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
    353   %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
    354                        i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5
    355   ret i32 %call
    356 }
    357 
    358 ; structs with size of 22 bytes, alignment of 16
    359 ; passed indirectly in x1 and x2
    360 define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 {
    361 entry:
    362 ; CHECK: f43
    363 ; CHECK: ldr w[[A:[0-9]+]], [x1]
    364 ; CHECK: ldr w[[B:[0-9]+]], [x2]
    365 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0
    366 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
    367 ; FAST: f43
    368 ; FAST: ldr w[[A:[0-9]+]], [x1]
    369 ; FAST: ldr w[[B:[0-9]+]], [x2]
    370 ; FAST: add w[[C:[0-9]+]], w[[A]], w0
    371 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
    372   %i1 = getelementptr inbounds %struct.s43* %s1, i64 0, i32 0
    373   %0 = load i32* %i1, align 4, !tbaa !0
    374   %i2 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 0
    375   %1 = load i32* %i2, align 4, !tbaa !0
    376   %s = getelementptr inbounds %struct.s43* %s1, i64 0, i32 1
    377   %2 = load i16* %s, align 2, !tbaa !3
    378   %conv = sext i16 %2 to i32
    379   %s5 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 1
    380   %3 = load i16* %s5, align 2, !tbaa !3
    381   %conv6 = sext i16 %3 to i32
    382   %add = add i32 %0, %i
    383   %add3 = add i32 %add, %1
    384   %add4 = add i32 %add3, %conv
    385   %add7 = add i32 %add4, %conv6
    386   ret i32 %add7
    387 }
    388 
    389 define i32 @caller43() #3 {
    390 entry:
    391 ; CHECK: caller43
    392 ; CHECK: str {{q[0-9]+}}, [sp, #48]
    393 ; CHECK: str {{q[0-9]+}}, [sp, #32]
    394 ; CHECK: str {{q[0-9]+}}, [sp, #16]
    395 ; CHECK: str {{q[0-9]+}}, [sp]
    396 ; CHECK: add x1, sp, #32
    397 ; CHECK: mov x2, sp
    398 ; Space for s1 is allocated at sp+32
    399 ; Space for s2 is allocated at sp
    400 
    401 ; FAST: caller43
    402 ; FAST: mov x29, sp
    403 ; Space for s1 is allocated at sp+32
    404 ; Space for s2 is allocated at sp
    405 ; FAST: add x1, sp, #32
    406 ; FAST: mov x2, sp
    407 ; FAST: str {{x[0-9]+}}, [sp, #32]
    408 ; FAST: str {{x[0-9]+}}, [sp, #40]
    409 ; FAST: str {{x[0-9]+}}, [sp, #48]
    410 ; FAST: str {{x[0-9]+}}, [sp, #56]
    411 ; FAST: str {{x[0-9]+}}, [sp]
    412 ; FAST: str {{x[0-9]+}}, [sp, #8]
    413 ; FAST: str {{x[0-9]+}}, [sp, #16]
    414 ; FAST: str {{x[0-9]+}}, [sp, #24]
    415   %tmp = alloca %struct.s43, align 16
    416   %tmp1 = alloca %struct.s43, align 16
    417   %0 = bitcast %struct.s43* %tmp to i8*
    418   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
    419   %1 = bitcast %struct.s43* %tmp1 to i8*
    420   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
    421   %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5
    422   ret i32 %call
    423 }
    424 
    425 declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
    426                        i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1,
    427                        %struct.s43* nocapture %s2) #2
    428 
    429 define i32 @caller43_stack() #3 {
    430 entry:
    431 ; CHECK: caller43_stack
    432 ; CHECK: mov x29, sp
    433 ; CHECK: sub sp, sp, #96
    434 ; CHECK: stur {{q[0-9]+}}, [x29, #-16]
    435 ; CHECK: stur {{q[0-9]+}}, [x29, #-32]
    436 ; CHECK: str {{q[0-9]+}}, [sp, #48]
    437 ; CHECK: str {{q[0-9]+}}, [sp, #32]
    438 ; Space for s1 is allocated at x29-32 = sp+64
    439 ; Space for s2 is allocated at sp+32
    440 ; CHECK: add x[[B:[0-9]+]], sp, #32
    441 ; CHECK: str x[[B]], [sp, #16]
    442 ; CHECK: sub x[[A:[0-9]+]], x29, #32
    443 ; Address of s1 is passed on stack at sp+8
    444 ; CHECK: str x[[A]], [sp, #8]
    445 ; CHECK: movz w[[C:[0-9]+]], #0x9
    446 ; CHECK: str w[[C]], [sp]
    447 
    448 ; FAST: caller43_stack
    449 ; FAST: sub sp, sp, #96
    450 ; Space for s1 is allocated at fp-32 = sp+64
    451 ; Space for s2 is allocated at sp+32
    452 ; FAST: sub x[[A:[0-9]+]], x29, #32
    453 ; FAST: add x[[B:[0-9]+]], sp, #32
    454 ; FAST: stur {{x[0-9]+}}, [x29, #-32]
    455 ; FAST: stur {{x[0-9]+}}, [x29, #-24]
    456 ; FAST: stur {{x[0-9]+}}, [x29, #-16]
    457 ; FAST: stur {{x[0-9]+}}, [x29, #-8]
    458 ; FAST: str {{x[0-9]+}}, [sp, #32]
    459 ; FAST: str {{x[0-9]+}}, [sp, #40]
    460 ; FAST: str {{x[0-9]+}}, [sp, #48]
    461 ; FAST: str {{x[0-9]+}}, [sp, #56]
    462 ; FAST: str {{w[0-9]+}}, [sp]
    463 ; Address of s1 is passed on stack at sp+8
    464 ; FAST: str {{x[0-9]+}}, [sp, #8]
    465 ; FAST: str {{x[0-9]+}}, [sp, #16]
    466   %tmp = alloca %struct.s43, align 16
    467   %tmp1 = alloca %struct.s43, align 16
    468   %0 = bitcast %struct.s43* %tmp to i8*
    469   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
    470   %1 = bitcast %struct.s43* %tmp1 to i8*
    471   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
    472   %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
    473                        i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5
    474   ret i32 %call
    475 }
    476 
    477 ; rdar://13668927
    478 ; Check that we don't split an i128.
    479 declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
    480                                i32 %i6, i32 %i7, i128 %s1, i32 %i8)
    481 
    482 define i32 @i128_split() {
    483 entry:
    484 ; CHECK: i128_split
    485 ; "i128 %0" should be on stack at [sp].
    486 ; "i32 8" should be on stack at [sp, #16].
    487 ; CHECK: str {{w[0-9]+}}, [sp, #16]
    488 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
    489 ; FAST: i128_split
    490 ; FAST: sub sp, sp, #48
    491 ; FAST: mov x[[ADDR:[0-9]+]], sp
    492 ; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
    493 ; Load/Store opt is disabled with -O0, so the i128 is split.
    494 ; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
    495 ; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
    496   %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
    497   %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
    498                                            i32 6, i32 7, i128 %0, i32 8) #5
    499   ret i32 %call
    500 }
    501 
    502 declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
    503                                i32 %i6, i32 %i7, i64 %s1, i32 %i8)
    504 
    505 define i32 @i64_split() {
    506 entry:
    507 ; CHECK: i64_split
    508 ; "i64 %0" should be in register x7.
    509 ; "i32 8" should be on stack at [sp].
    510 ; CHECK: ldr x7, [{{x[0-9]+}}]
    511 ; CHECK: str {{w[0-9]+}}, [sp]
    512 ; FAST: i64_split
    513 ; FAST: ldr x7, [{{x[0-9]+}}]
    514 ; FAST: str {{w[0-9]+}}, [sp]
    515   %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16
    516   %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
    517                                     i32 6, i32 7, i64 %0, i32 8) #5
    518   ret i32 %call
    519 }
    520 
    521 attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
    522 attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
    523 attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
    524 attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
    525 attributes #4 = { nounwind }
    526 attributes #5 = { nobuiltin }
    527 
    528 !0 = metadata !{metadata !"int", metadata !1}
    529 !1 = metadata !{metadata !"omnipotent char", metadata !2}
    530 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
    531 !3 = metadata !{metadata !"short", metadata !1}
    532 !4 = metadata !{i64 0, i64 4, metadata !0, i64 4, i64 2, metadata !3, i64 8, i64 4, metadata !0, i64 12, i64 2, metadata !3, i64 16, i64 4, metadata !0, i64 20, i64 2, metadata !3}
    533