Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s
      2 ; RUN: llc < %s -O0 -disable-fp-elim | FileCheck -check-prefix=FAST %s
      3 target triple = "arm64-apple-darwin"
      4 
      5 ; rdar://12648441
      6 ; Generated from arm64-arguments.c with -O2.
      7 ; Test passing structs with size < 8, < 16 and > 16
      8 ; with alignment of 16 and without
      9 
     10 ; Structs with size < 8
     11 %struct.s38 = type { i32, i16 }
     12 ; With alignment of 16, the size will be padded to multiple of 16 bytes.
     13 %struct.s39 = type { i32, i16, [10 x i8] }
     14 ; Structs with size < 16
     15 %struct.s40 = type { i32, i16, i32, i16 }
     16 %struct.s41 = type { i32, i16, i32, i16 }
     17 ; Structs with size > 16
     18 %struct.s42 = type { i32, i16, i32, i16, i32, i16 }
     19 %struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] }
     20 
     21 @g38 = common global %struct.s38 zeroinitializer, align 4
     22 @g38_2 = common global %struct.s38 zeroinitializer, align 4
     23 @g39 = common global %struct.s39 zeroinitializer, align 16
     24 @g39_2 = common global %struct.s39 zeroinitializer, align 16
     25 @g40 = common global %struct.s40 zeroinitializer, align 4
     26 @g40_2 = common global %struct.s40 zeroinitializer, align 4
     27 @g41 = common global %struct.s41 zeroinitializer, align 16
     28 @g41_2 = common global %struct.s41 zeroinitializer, align 16
     29 @g42 = common global %struct.s42 zeroinitializer, align 4
     30 @g42_2 = common global %struct.s42 zeroinitializer, align 4
     31 @g43 = common global %struct.s43 zeroinitializer, align 16
     32 @g43_2 = common global %struct.s43 zeroinitializer, align 16
     33 
     34 ; structs with size < 8 bytes, passed via i64 in x1 and x2
     35 define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 {
     36 entry:
     37 ; CHECK-LABEL: f38
     38 ; CHECK: add w[[A:[0-9]+]], w1, w0
     39 ; CHECK: add {{w[0-9]+}}, w[[A]], w2
     40   %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32
     41   %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32
     42   %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32
     43   %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32
     44   %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16
     45   %sext = trunc i64 %sext8 to i32
     46   %conv = ashr exact i32 %sext, 16
     47   %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16
     48   %sext10 = trunc i64 %sext1011 to i32
     49   %conv6 = ashr exact i32 %sext10, 16
     50   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
     51   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
     52   %add4 = add i32 %add3, %conv
     53   %add7 = add i32 %add4, %conv6
     54   ret i32 %add7
     55 }
     56 
     57 define i32 @caller38() #1 {
     58 entry:
     59 ; CHECK-LABEL: caller38
     60 ; CHECK: ldr x1,
     61 ; CHECK: ldr x2,
     62   %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
     63   %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
     64   %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
     65   ret i32 %call
     66 }
     67 
     68 declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
     69                 i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0
     70 
     71 ; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16]
     72 ; i9 at [sp]
     73 define i32 @caller38_stack() #1 {
     74 entry:
     75 ; CHECK-LABEL: caller38_stack
     76 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
     77 ; CHECK: mov w[[C:[0-9]+]], #9
     78 ; CHECK: str w[[C]], [sp]
     79   %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
     80   %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
     81   %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
     82                                    i32 7, i32 8, i32 9, i64 %0, i64 %1) #5
     83   ret i32 %call
     84 }
     85 
     86 ; structs with size < 8 bytes, alignment of 16
     87 ; passed via i128 in x1 and x3
     88 define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
     89 entry:
     90 ; CHECK-LABEL: f39
     91 ; CHECK: add w[[A:[0-9]+]], w1, w0
     92 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
     93   %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
     94   %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
     95   %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
     96   %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
     97   %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
     98   %sext = trunc i128 %sext8 to i32
     99   %conv = ashr exact i32 %sext, 16
    100   %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
    101   %sext10 = trunc i128 %sext1011 to i32
    102   %conv6 = ashr exact i32 %sext10, 16
    103   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
    104   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
    105   %add4 = add i32 %add3, %conv
    106   %add7 = add i32 %add4, %conv6
    107   ret i32 %add7
    108 }
    109 
    110 define i32 @caller39() #1 {
    111 entry:
    112 ; CHECK-LABEL: caller39
    113 ; CHECK: ldp x1, x2,
    114 ; CHECK: ldp x3, x4,
    115   %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
    116   %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
    117   %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
    118   ret i32 %call
    119 }
    120 
    121 declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
    122                 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
    123 
    124 ; structs with size < 8 bytes, alignment 16
    125 ; passed on stack at [sp+16] and [sp+32]
    126 define i32 @caller39_stack() #1 {
    127 entry:
    128 ; CHECK-LABEL: caller39_stack
    129 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
    130 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
    131 ; CHECK: mov w[[C:[0-9]+]], #9
    132 ; CHECK: str w[[C]], [sp]
    133   %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
    134   %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
    135   %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
    136                                    i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
    137   ret i32 %call
    138 }
    139 
    140 ; structs with size < 16 bytes
    141 ; passed via i128 in x1 and x3
    142 define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 {
    143 entry:
    144 ; CHECK-LABEL: f40
    145 ; CHECK: add w[[A:[0-9]+]], w1, w0
    146 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
    147   %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0
    148   %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0
    149   %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32
    150   %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32
    151   %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32
    152   %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16
    153   %sext = trunc i64 %sext8 to i32
    154   %conv = ashr exact i32 %sext, 16
    155   %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32
    156   %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16
    157   %sext10 = trunc i64 %sext1011 to i32
    158   %conv6 = ashr exact i32 %sext10, 16
    159   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
    160   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
    161   %add4 = add i32 %add3, %conv
    162   %add7 = add i32 %add4, %conv6
    163   ret i32 %add7
    164 }
    165 
    166 define i32 @caller40() #1 {
    167 entry:
    168 ; CHECK-LABEL: caller40
    169 ; CHECK: ldp x1, x2,
    170 ; CHECK: ldp x3, x4,
    171   %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
    172   %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
    173   %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
    174   ret i32 %call
    175 }
    176 
    177 declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
    178                 i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0
    179 
    180 ; structs with size < 16 bytes
    181 ; passed on stack at [sp+8] and [sp+24]
    182 define i32 @caller40_stack() #1 {
    183 entry:
    184 ; CHECK-LABEL: caller40_stack
    185 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
    186 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
    187 ; CHECK: mov w[[C:[0-9]+]], #9
    188 ; CHECK: str w[[C]], [sp]
    189   %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
    190   %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
    191   %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
    192                          i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5
    193   ret i32 %call
    194 }
    195 
    196 ; structs with size < 16 bytes, alignment of 16
    197 ; passed via i128 in x1 and x3
    198 define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
    199 entry:
    200 ; CHECK-LABEL: f41
    201 ; CHECK: add w[[A:[0-9]+]], w1, w0
    202 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
    203   %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
    204   %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
    205   %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
    206   %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
    207   %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
    208   %sext = trunc i128 %sext8 to i32
    209   %conv = ashr exact i32 %sext, 16
    210   %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
    211   %sext10 = trunc i128 %sext1011 to i32
    212   %conv6 = ashr exact i32 %sext10, 16
    213   %add = add i32 %s1.sroa.0.0.extract.trunc, %i
    214   %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
    215   %add4 = add i32 %add3, %conv
    216   %add7 = add i32 %add4, %conv6
    217   ret i32 %add7
    218 }
    219 
    220 define i32 @caller41() #1 {
    221 entry:
    222 ; CHECK-LABEL: caller41
    223 ; CHECK: ldp x1, x2,
    224 ; CHECK: ldp x3, x4,
    225   %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
    226   %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
    227   %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
    228   ret i32 %call
    229 }
    230 
    231 declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
    232                 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
    233 
    234 ; structs with size < 16 bytes, alignment of 16
    235 ; passed on stack at [sp+16] and [sp+32]
    236 define i32 @caller41_stack() #1 {
    237 entry:
    238 ; CHECK-LABEL: caller41_stack
    239 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
    240 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
    241 ; CHECK: mov w[[C:[0-9]+]], #9
    242 ; CHECK: str w[[C]], [sp]
    243   %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
    244   %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
    245   %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
    246                             i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
    247   ret i32 %call
    248 }
    249 
    250 ; structs with size of 22 bytes, passed indirectly in x1 and x2
    251 define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 {
    252 entry:
    253 ; CHECK-LABEL: f42
    254 ; CHECK: ldr w[[A:[0-9]+]], [x1]
    255 ; CHECK: ldr w[[B:[0-9]+]], [x2]
    256 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0
    257 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
    258 ; FAST: f42
    259 ; FAST: ldr w[[A:[0-9]+]], [x1]
    260 ; FAST: ldr w[[B:[0-9]+]], [x2]
    261 ; FAST: add w[[C:[0-9]+]], w[[A]], w0
    262 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
    263   %i1 = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 0
    264   %0 = load i32, i32* %i1, align 4, !tbaa !0
    265   %i2 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 0
    266   %1 = load i32, i32* %i2, align 4, !tbaa !0
    267   %s = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 1
    268   %2 = load i16, i16* %s, align 2, !tbaa !3
    269   %conv = sext i16 %2 to i32
    270   %s5 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 1
    271   %3 = load i16, i16* %s5, align 2, !tbaa !3
    272   %conv6 = sext i16 %3 to i32
    273   %add = add i32 %0, %i
    274   %add3 = add i32 %add, %1
    275   %add4 = add i32 %add3, %conv
    276   %add7 = add i32 %add4, %conv6
    277   ret i32 %add7
    278 }
    279 
    280 ; For s1, we allocate a 22-byte space, pass its address via x1
    281 define i32 @caller42() #3 {
    282 entry:
    283 ; CHECK-LABEL: caller42
    284 ; CHECK: str {{x[0-9]+}}, [sp, #48]
    285 ; CHECK: str {{q[0-9]+}}, [sp, #32]
    286 ; CHECK: str {{x[0-9]+}}, [sp, #16]
    287 ; CHECK: str {{q[0-9]+}}, [sp]
    288 ; CHECK: add x1, sp, #32
    289 ; CHECK: mov x2, sp
    290 ; Space for s1 is allocated at sp+32
    291 ; Space for s2 is allocated at sp
    292 
    293 ; FAST-LABEL: caller42
    294 ; FAST: sub sp, sp, #112
    295 ; Space for s1 is allocated at fp-24 = sp+72
    296 ; Space for s2 is allocated at sp+48
    297 ; FAST: sub x[[A:[0-9]+]], x29, #24
    298 ; FAST: add x[[A:[0-9]+]], sp, #48
    299 ; Call memcpy with size = 24 (0x18)
    300 ; FAST: orr {{x[0-9]+}}, xzr, #0x18
    301   %tmp = alloca %struct.s42, align 4
    302   %tmp1 = alloca %struct.s42, align 4
    303   %0 = bitcast %struct.s42* %tmp to i8*
    304   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
    305   %1 = bitcast %struct.s42* %tmp1 to i8*
    306   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
    307   %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5
    308   ret i32 %call
    309 }
    310 
    311 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4
    312 
    313 declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
    314                        i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1,
    315                        %struct.s42* nocapture %s2) #2
    316 
    317 define i32 @caller42_stack() #3 {
    318 entry:
    319 ; CHECK-LABEL: caller42_stack
    320 ; CHECK: sub sp, sp, #112
    321 ; CHECK: add x29, sp, #96
    322 ; CHECK: stur {{x[0-9]+}}, [x29, #-16]
    323 ; CHECK: stur {{q[0-9]+}}, [x29, #-32]
    324 ; CHECK: str {{x[0-9]+}}, [sp, #48]
    325 ; CHECK: str {{q[0-9]+}}, [sp, #32]
    326 ; Space for s1 is allocated at x29-32 = sp+64
    327 ; Space for s2 is allocated at sp+32
    328 ; CHECK: add x[[B:[0-9]+]], sp, #32
    329 ; CHECK: str x[[B]], [sp, #16]
    330 ; CHECK: sub x[[A:[0-9]+]], x29, #32
    331 ; Address of s1 is passed on stack at sp+8
    332 ; CHECK: str x[[A]], [sp, #8]
    333 ; CHECK: mov w[[C:[0-9]+]], #9
    334 ; CHECK: str w[[C]], [sp]
    335 
    336 ; FAST-LABEL: caller42_stack
    337 ; Space for s1 is allocated at fp-24
    338 ; Space for s2 is allocated at fp-48
    339 ; FAST: sub x[[A:[0-9]+]], x29, #24
    340 ; FAST: sub x[[B:[0-9]+]], x29, #48
    341 ; Call memcpy with size = 24 (0x18)
    342 ; FAST: orr {{x[0-9]+}}, xzr, #0x18
    343 ; FAST: str {{w[0-9]+}}, [sp]
    344 ; Address of s1 is passed on stack at sp+8
    345 ; FAST: str {{x[0-9]+}}, [sp, #8]
    346 ; FAST: str {{x[0-9]+}}, [sp, #16]
    347   %tmp = alloca %struct.s42, align 4
    348   %tmp1 = alloca %struct.s42, align 4
    349   %0 = bitcast %struct.s42* %tmp to i8*
    350   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
    351   %1 = bitcast %struct.s42* %tmp1 to i8*
    352   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
    353   %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
    354                        i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5
    355   ret i32 %call
    356 }
    357 
    358 ; structs with size of 22 bytes, alignment of 16
    359 ; passed indirectly in x1 and x2
    360 define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 {
    361 entry:
    362 ; CHECK-LABEL: f43
    363 ; CHECK: ldr w[[A:[0-9]+]], [x1]
    364 ; CHECK: ldr w[[B:[0-9]+]], [x2]
    365 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0
    366 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
    367 ; FAST-LABEL: f43
    368 ; FAST: ldr w[[A:[0-9]+]], [x1]
    369 ; FAST: ldr w[[B:[0-9]+]], [x2]
    370 ; FAST: add w[[C:[0-9]+]], w[[A]], w0
    371 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
    372   %i1 = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 0
    373   %0 = load i32, i32* %i1, align 4, !tbaa !0
    374   %i2 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 0
    375   %1 = load i32, i32* %i2, align 4, !tbaa !0
    376   %s = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 1
    377   %2 = load i16, i16* %s, align 2, !tbaa !3
    378   %conv = sext i16 %2 to i32
    379   %s5 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 1
    380   %3 = load i16, i16* %s5, align 2, !tbaa !3
    381   %conv6 = sext i16 %3 to i32
    382   %add = add i32 %0, %i
    383   %add3 = add i32 %add, %1
    384   %add4 = add i32 %add3, %conv
    385   %add7 = add i32 %add4, %conv6
    386   ret i32 %add7
    387 }
    388 
    389 define i32 @caller43() #3 {
    390 entry:
    391 ; CHECK-LABEL: caller43
    392 ; CHECK: str {{q[0-9]+}}, [sp, #48]
    393 ; CHECK: str {{q[0-9]+}}, [sp, #32]
    394 ; CHECK: str {{q[0-9]+}}, [sp, #16]
    395 ; CHECK: str {{q[0-9]+}}, [sp]
    396 ; CHECK: add x1, sp, #32
    397 ; CHECK: mov x2, sp
    398 ; Space for s1 is allocated at sp+32
    399 ; Space for s2 is allocated at sp
    400 
    401 ; FAST-LABEL: caller43
    402 ; FAST: add x29, sp, #64
    403 ; Space for s1 is allocated at sp+32
    404 ; Space for s2 is allocated at sp
    405 ; FAST: add x1, sp, #32
    406 ; FAST: mov x2, sp
    407 ; FAST: str {{x[0-9]+}}, [sp, #32]
    408 ; FAST: str {{x[0-9]+}}, [sp, #40]
    409 ; FAST: str {{x[0-9]+}}, [sp, #48]
    410 ; FAST: str {{x[0-9]+}}, [sp, #56]
    411 ; FAST: str {{x[0-9]+}}, [sp]
    412 ; FAST: str {{x[0-9]+}}, [sp, #8]
    413 ; FAST: str {{x[0-9]+}}, [sp, #16]
    414 ; FAST: str {{x[0-9]+}}, [sp, #24]
    415   %tmp = alloca %struct.s43, align 16
    416   %tmp1 = alloca %struct.s43, align 16
    417   %0 = bitcast %struct.s43* %tmp to i8*
    418   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
    419   %1 = bitcast %struct.s43* %tmp1 to i8*
    420   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
    421   %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5
    422   ret i32 %call
    423 }
    424 
    425 declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
    426                        i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1,
    427                        %struct.s43* nocapture %s2) #2
    428 
    429 define i32 @caller43_stack() #3 {
    430 entry:
    431 ; CHECK-LABEL: caller43_stack
    432 ; CHECK: sub sp, sp, #112
    433 ; CHECK: add x29, sp, #96
    434 ; CHECK: stur {{q[0-9]+}}, [x29, #-16]
    435 ; CHECK: stur {{q[0-9]+}}, [x29, #-32]
    436 ; CHECK: str {{q[0-9]+}}, [sp, #48]
    437 ; CHECK: str {{q[0-9]+}}, [sp, #32]
    438 ; Space for s1 is allocated at x29-32 = sp+64
    439 ; Space for s2 is allocated at sp+32
    440 ; CHECK: add x[[B:[0-9]+]], sp, #32
    441 ; CHECK: str x[[B]], [sp, #16]
    442 ; CHECK: sub x[[A:[0-9]+]], x29, #32
    443 ; Address of s1 is passed on stack at sp+8
    444 ; CHECK: str x[[A]], [sp, #8]
    445 ; CHECK: mov w[[C:[0-9]+]], #9
    446 ; CHECK: str w[[C]], [sp]
    447 
    448 ; FAST-LABEL: caller43_stack
    449 ; FAST: sub sp, sp, #112
    450 ; Space for s1 is allocated at fp-32 = sp+64
    451 ; Space for s2 is allocated at sp+32
    452 ; FAST: sub x[[A:[0-9]+]], x29, #32
    453 ; FAST: add x[[B:[0-9]+]], sp, #32
    454 ; FAST: stur {{x[0-9]+}}, [x29, #-32]
    455 ; FAST: stur {{x[0-9]+}}, [x29, #-24]
    456 ; FAST: stur {{x[0-9]+}}, [x29, #-16]
    457 ; FAST: stur {{x[0-9]+}}, [x29, #-8]
    458 ; FAST: str {{x[0-9]+}}, [sp, #32]
    459 ; FAST: str {{x[0-9]+}}, [sp, #40]
    460 ; FAST: str {{x[0-9]+}}, [sp, #48]
    461 ; FAST: str {{x[0-9]+}}, [sp, #56]
    462 ; FAST: str {{w[0-9]+}}, [sp]
    463 ; Address of s1 is passed on stack at sp+8
    464 ; FAST: str {{x[0-9]+}}, [sp, #8]
    465 ; FAST: str {{x[0-9]+}}, [sp, #16]
    466   %tmp = alloca %struct.s43, align 16
    467   %tmp1 = alloca %struct.s43, align 16
    468   %0 = bitcast %struct.s43* %tmp to i8*
    469   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
    470   %1 = bitcast %struct.s43* %tmp1 to i8*
    471   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
    472   %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
    473                        i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5
    474   ret i32 %call
    475 }
    476 
    477 ; rdar://13668927
    478 ; Check that we don't split an i128.
    479 declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
    480                                i32 %i6, i32 %i7, i128 %s1, i32 %i8)
    481 
    482 define i32 @i128_split() {
    483 entry:
    484 ; CHECK-LABEL: i128_split
    485 ; "i128 %0" should be on stack at [sp].
    486 ; "i32 8" should be on stack at [sp, #16].
    487 ; CHECK: str {{w[0-9]+}}, [sp, #16]
    488 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
    489 ; FAST-LABEL: i128_split
    490 ; FAST: sub sp, sp
    491 ; FAST: mov x[[ADDR:[0-9]+]], sp
    492 ; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
    493 ; Load/Store opt is disabled with -O0, so the i128 is split.
    494 ; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
    495 ; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
    496   %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
    497   %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
    498                                            i32 6, i32 7, i128 %0, i32 8) #5
    499   ret i32 %call
    500 }
    501 
    502 declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
    503                                i32 %i6, i32 %i7, i64 %s1, i32 %i8)
    504 
    505 define i32 @i64_split() {
    506 entry:
    507 ; CHECK-LABEL: i64_split
    508 ; "i64 %0" should be in register x7.
    509 ; "i32 8" should be on stack at [sp].
    510 ; CHECK: ldr x7, [{{x[0-9]+}}]
    511 ; CHECK: str {{w[0-9]+}}, [sp]
    512 ; FAST-LABEL: i64_split
    513 ; FAST: ldr x7, [{{x[0-9]+}}]
    514 ; FAST: mov x[[R0:[0-9]+]], sp
    515 ; FAST: orr w[[R1:[0-9]+]], wzr, #0x8
    516 ; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}}
    517   %0 = load i64, i64* bitcast (%struct.s41* @g41 to i64*), align 16
    518   %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
    519                                     i32 6, i32 7, i64 %0, i32 8) #5
    520   ret i32 %call
    521 }
    522 
    523 attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
    524 attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
    525 attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
    526 attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
    527 attributes #4 = { nounwind }
    528 attributes #5 = { nobuiltin }
    529 
    530 !0 = !{!"int", !1}
    531 !1 = !{!"omnipotent char", !2}
    532 !2 = !{!"Simple C/C++ TBAA"}
    533 !3 = !{!"short", !1}
    534 !4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3}
    535