Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s
      2 
      3 // CHECK: define signext i8 @f0()
      4 char f0(void) {
      5   return 0;
      6 }
      7 
      8 // Struct as return type. Aggregates <= 16 bytes are passed directly and round
      9 // up to multiple of 8 bytes.
     10 // CHECK: define i64 @f1()
     11 struct s1 { char f0; };
     12 struct s1 f1(void) {}
     13 
     14 // CHECK: define i64 @f2()
     15 struct s2 { short f0; };
     16 struct s2 f2(void) {}
     17 
     18 // CHECK: define i64 @f3()
     19 struct s3 { int f0; };
     20 struct s3 f3(void) {}
     21 
     22 // CHECK: define i64 @f4()
     23 struct s4 { struct s4_0 { int f0; } f0; };
     24 struct s4 f4(void) {}
     25 
     26 // CHECK: define i64 @f5()
     27 struct s5 { struct { } f0; int f1; };
     28 struct s5 f5(void) {}
     29 
     30 // CHECK: define i64 @f6()
     31 struct s6 { int f0[1]; };
     32 struct s6 f6(void) {}
     33 
     34 // CHECK: define void @f7()
     35 struct s7 { struct { int : 0; } f0; };
     36 struct s7 f7(void) {}
     37 
     38 // CHECK: define void @f8()
     39 struct s8 { struct { int : 0; } f0[1]; };
     40 struct s8 f8(void) {}
     41 
     42 // CHECK: define i64 @f9()
     43 struct s9 { int f0; int : 0; };
     44 struct s9 f9(void) {}
     45 
     46 // CHECK: define i64 @f10()
     47 struct s10 { int f0; int : 0; int : 0; };
     48 struct s10 f10(void) {}
     49 
     50 // CHECK: define i64 @f11()
     51 struct s11 { int : 0; int f0; };
     52 struct s11 f11(void) {}
     53 
     54 // CHECK: define i64 @f12()
     55 union u12 { char f0; short f1; int f2; };
     56 union u12 f12(void) {}
     57 
     58 // Homogeneous Aggregate as return type will be passed directly.
     59 // CHECK: define %struct.s13 @f13()
     60 struct s13 { float f0; };
     61 struct s13 f13(void) {}
     62 // CHECK: define %union.u14 @f14()
     63 union u14 { float f0; };
     64 union u14 f14(void) {}
     65 
     66 // CHECK: define void @f15()
     67 void f15(struct s7 a0) {}
     68 
     69 // CHECK: define void @f16()
     70 void f16(struct s8 a0) {}
     71 
     72 // CHECK: define i64 @f17()
     73 struct s17 { short f0 : 13; char f1 : 4; };
     74 struct s17 f17(void) {}
     75 
     76 // CHECK: define i64 @f18()
     77 struct s18 { short f0; char f1 : 4; };
     78 struct s18 f18(void) {}
     79 
     80 // CHECK: define i64 @f19()
     81 struct s19 { int f0; struct s8 f1; };
     82 struct s19 f19(void) {}
     83 
     84 // CHECK: define i64 @f20()
     85 struct s20 { struct s8 f1; int f0; };
     86 struct s20 f20(void) {}
     87 
     88 // CHECK: define i64 @f21()
     89 struct s21 { struct {} f1; int f0 : 4; };
     90 struct s21 f21(void) {}
     91 
     92 // CHECK: define i64 @f22()
     93 // CHECK: define i64 @f23()
     94 // CHECK: define i64 @f24()
     95 // CHECK: define i128 @f25()
     96 // CHECK: define { float, float } @f26()
     97 // CHECK: define { double, double } @f27()
     98 _Complex char       f22(void) {}
     99 _Complex short      f23(void) {}
    100 _Complex int        f24(void) {}
    101 _Complex long long  f25(void) {}
    102 _Complex float      f26(void) {}
    103 _Complex double     f27(void) {}
    104 
    105 // CHECK: define i64 @f28()
    106 struct s28 { _Complex char f0; };
    107 struct s28 f28() {}
    108 
    109 // CHECK: define i64 @f29()
    110 struct s29 { _Complex short f0; };
    111 struct s29 f29() {}
    112 
    113 // CHECK: define i64 @f30()
    114 struct s30 { _Complex int f0; };
    115 struct s30 f30() {}
    116 
    117 struct s31 { char x; };
    118 void f31(struct s31 s) { }
    119 // CHECK: define void @f31(i64 %s.coerce)
    120 // CHECK: %s = alloca %struct.s31, align 8
    121 // CHECK: trunc i64 %s.coerce to i8
    122 // CHECK: store i8 %{{.*}},
    123 
    124 struct s32 { double x; };
    125 void f32(struct s32 s) { }
    126 // Expand Homogeneous Aggregate.
    127 // CHECK: @f32(double %{{.*}})
    128 
    129 // A composite type larger than 16 bytes should be passed indirectly.
    130 struct s33 { char buf[32*32]; };
    131 void f33(struct s33 s) { }
    132 // CHECK: define void @f33(%struct.s33* %s)
    133 
    134 struct s34 { char c; };
    135 void f34(struct s34 s);
    136 void g34(struct s34 *s) { f34(*s); }
    137 // CHECK: @g34(%struct.s34* %s)
    138 // CHECK: %[[a:.*]] = load i8* %{{.*}}
    139 // CHECK: zext i8 %[[a]] to i64
    140 // CHECK: call void @f34(i64 %{{.*}})
    141 
    142 /*
    143  * Check that va_arg accesses stack according to ABI alignment
    144  */
    145 long long t1(int i, ...) {
    146     // CHECK: t1
    147     __builtin_va_list ap;
    148     __builtin_va_start(ap, i);
    149     // CHECK-NOT: add i32 %{{.*}} 7
    150     // CHECK-NOT: and i32 %{{.*}} -8
    151     long long ll = __builtin_va_arg(ap, long long);
    152     __builtin_va_end(ap);
    153     return ll;
    154 }
    155 double t2(int i, ...) {
    156     // CHECK: t2
    157     __builtin_va_list ap;
    158     __builtin_va_start(ap, i);
    159     // CHECK-NOT: add i32 %{{.*}} 7
    160     // CHECK-NOT: and i32 %{{.*}} -8
    161     double ll = __builtin_va_arg(ap, double);
    162     __builtin_va_end(ap);
    163     return ll;
    164 }
    165 
    166 #include <arm_neon.h>
    167 
    168 // Homogeneous Vector Aggregate as return type and argument type.
    169 // CHECK: define %struct.int8x16x2_t @f0_0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
    170 int8x16x2_t f0_0(int8x16_t a0, int8x16_t a1) {
    171   return vzipq_s8(a0, a1);
    172 }
    173 
    174 // Test direct vector passing.
    175 typedef float T_float32x2 __attribute__ ((__vector_size__ (8)));
    176 typedef float T_float32x4 __attribute__ ((__vector_size__ (16)));
    177 typedef float T_float32x8 __attribute__ ((__vector_size__ (32)));
    178 typedef float T_float32x16 __attribute__ ((__vector_size__ (64)));
    179 
    180 // CHECK: define <2 x float> @f1_0(<2 x float> %{{.*}})
    181 T_float32x2 f1_0(T_float32x2 a0) { return a0; }
    182 // CHECK: define <4 x float> @f1_1(<4 x float> %{{.*}})
    183 T_float32x4 f1_1(T_float32x4 a0) { return a0; }
    184 // Vector with length bigger than 16-byte is illegal and is passed indirectly.
    185 // CHECK: define void @f1_2(<8 x float>* noalias sret  %{{.*}}, <8 x float>*)
    186 T_float32x8 f1_2(T_float32x8 a0) { return a0; }
    187 // CHECK: define void @f1_3(<16 x float>* noalias sret %{{.*}}, <16 x float>*)
    188 T_float32x16 f1_3(T_float32x16 a0) { return a0; }
    189 
    190 // Testing alignment with aggregates: HFA, aggregates with size <= 16 bytes and
    191 // aggregates with size > 16 bytes.
    192 struct s35
    193 {
    194    float v[4]; //Testing HFA.
    195 } __attribute__((aligned(16)));
    196 typedef struct s35 s35_with_align;
    197 
    198 typedef __attribute__((neon_vector_type(4))) float float32x4_t;
    199 float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
    200 // CHECK: define <4 x float> @f35(i32 %i, float %s1.0, float %s1.1, float %s1.2, float %s1.3, float %s2.0, float %s2.1, float %s2.2, float %s2.3)
    201 // CHECK: %s1 = alloca %struct.s35, align 16
    202 // CHECK: %s2 = alloca %struct.s35, align 16
    203 // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>*
    204 // CHECK: load <4 x float>* %[[a]], align 16
    205 // CHECK: %[[b:.*]] = bitcast %struct.s35* %s2 to <4 x float>*
    206 // CHECK: load <4 x float>* %[[b]], align 16
    207   float32x4_t v = vaddq_f32(*(float32x4_t *)&s1,
    208                             *(float32x4_t *)&s2);
    209   return v;
    210 }
    211 
    212 struct s36
    213 {
    214    int v[4]; //Testing 16-byte aggregate.
    215 } __attribute__((aligned(16)));
    216 typedef struct s36 s36_with_align;
    217 
    218 typedef __attribute__((neon_vector_type(4))) int int32x4_t;
    219 int32x4_t f36(int i, s36_with_align s1, s36_with_align s2) {
    220 // CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
    221 // CHECK: %s1 = alloca %struct.s36, align 16
    222 // CHECK: %s2 = alloca %struct.s36, align 16
    223 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
    224 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
    225 // CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>*
    226 // CHECK: load <4 x i32>* %[[a]], align 16
    227 // CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>*
    228 // CHECK: load <4 x i32>* %[[b]], align 16
    229   int32x4_t v = vaddq_s32(*(int32x4_t *)&s1,
    230                           *(int32x4_t *)&s2);
    231   return v;
    232 }
    233 
    234 struct s37
    235 {
    236    int v[18]; //Testing large aggregate.
    237 } __attribute__((aligned(16)));
    238 typedef struct s37 s37_with_align;
    239 
    240 int32x4_t f37(int i, s37_with_align s1, s37_with_align s2) {
    241 // CHECK: define <4 x i32> @f37(i32 %i, %struct.s37* %s1, %struct.s37* %s2)
    242 // CHECK: %[[a:.*]] = bitcast %struct.s37* %s1 to <4 x i32>*
    243 // CHECK: load <4 x i32>* %[[a]], align 16
    244 // CHECK: %[[b:.*]] = bitcast %struct.s37* %s2 to <4 x i32>*
    245 // CHECK: load <4 x i32>* %[[b]], align 16
    246   int32x4_t v = vaddq_s32(*(int32x4_t *)&s1,
    247                           *(int32x4_t *)&s2);
    248   return v;
    249 }
    250 s37_with_align g37;
    251 int32x4_t caller37() {
    252 // CHECK: caller37
    253 // CHECK: %[[a:.*]] = alloca %struct.s37, align 16
    254 // CHECK: %[[b:.*]] = alloca %struct.s37, align 16
    255 // CHECK: call void @llvm.memcpy
    256 // CHECK: call void @llvm.memcpy
    257 // CHECK: call <4 x i32> @f37(i32 3, %struct.s37* %[[a]], %struct.s37* %[[b]])
    258   return f37(3, g37, g37);
    259 }
    260 
    261 // rdar://problem/12648441
    262 // Test passing structs with size < 8, < 16 and > 16
    263 // with alignment of 16 and without
    264 
    265 // structs with size <= 8 bytes, without alignment attribute
    266 // passed as i64 regardless of the align attribute
    267 struct s38
    268 {
    269   int i;
    270   short s;
    271 };
    272 typedef struct s38 s38_no_align;
    273 // passing structs in registers
    274 __attribute__ ((noinline))
    275 int f38(int i, s38_no_align s1, s38_no_align s2) {
    276 // CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce)
    277 // CHECK: %s1 = alloca %struct.s38, align 8
    278 // CHECK: %s2 = alloca %struct.s38, align 8
    279 // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1
    280 // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1
    281 // CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 0
    282 // CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 0
    283 // CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 1
    284 // CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 1
    285   return s1.i + s2.i + i + s1.s + s2.s;
    286 }
    287 s38_no_align g38;
    288 s38_no_align g38_2;
    289 int caller38() {
    290 // CHECK: define i32 @caller38()
    291 // CHECK: %[[a:.*]] = load i64* bitcast (%struct.s38* @g38 to i64*), align 1
    292 // CHECK: %[[b:.*]] = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
    293 // CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]])
    294   return f38(3, g38, g38_2);
    295 }
    296 // passing structs on stack
    297 __attribute__ ((noinline))
    298 int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
    299               int i9, s38_no_align s1, s38_no_align s2) {
    300 // CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce)
    301 // CHECK: %s1 = alloca %struct.s38, align 8
    302 // CHECK: %s2 = alloca %struct.s38, align 8
    303 // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1
    304 // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1
    305 // CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 0
    306 // CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 0
    307 // CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 1
    308 // CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 1
    309   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
    310 }
    311 int caller38_stack() {
    312 // CHECK: define i32 @caller38_stack()
    313 // CHECK: %[[a:.*]] = load i64* bitcast (%struct.s38* @g38 to i64*), align 1
    314 // CHECK: %[[b:.*]] = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
    315 // CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]])
    316   return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2);
    317 }
    318 
    319 // structs with size <= 8 bytes, with alignment attribute
    320 struct s39
    321 {
    322   int i;
    323   short s;
    324 } __attribute__((aligned(16)));
    325 typedef struct s39 s39_with_align;
    326 // passing aligned structs in registers
    327 __attribute__ ((noinline))
    328 int f39(int i, s39_with_align s1, s39_with_align s2) {
    329 // CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
    330 // CHECK: %s1 = alloca %struct.s39, align 16
    331 // CHECK: %s2 = alloca %struct.s39, align 16
    332 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
    333 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
    334 // CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 0
    335 // CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 0
    336 // CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 1
    337 // CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 1
    338   return s1.i + s2.i + i + s1.s + s2.s;
    339 }
    340 s39_with_align g39;
    341 s39_with_align g39_2;
    342 int caller39() {
    343 // CHECK: define i32 @caller39()
    344 // CHECK: %[[a:.*]] = load i128* bitcast (%struct.s39* @g39 to i128*), align 1
    345 // CHECK: %[[b:.*]] = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
    346 // CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]])
    347   return f39(3, g39, g39_2);
    348 }
    349 // passing aligned structs on stack
    350 __attribute__ ((noinline))
    351 int f39_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
    352               int i9, s39_with_align s1, s39_with_align s2) {
    353 // CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
    354 // CHECK: %s1 = alloca %struct.s39, align 16
    355 // CHECK: %s2 = alloca %struct.s39, align 16
    356 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
    357 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
    358 // CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 0
    359 // CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 0
    360 // CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 1
    361 // CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 1
    362   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
    363 }
    364 int caller39_stack() {
    365 // CHECK: define i32 @caller39_stack()
    366 // CHECK: %[[a:.*]] = load i128* bitcast (%struct.s39* @g39 to i128*), align 1
    367 // CHECK: %[[b:.*]] = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
    368 // CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
    369   return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2);
    370 }
    371 
    372 // structs with size <= 16 bytes, without alignment attribute
    373 struct s40
    374 {
    375   int i;
    376   short s;
    377   int i2;
    378   short s2;
    379 };
    380 typedef struct s40 s40_no_align;
    381 // passing structs in registers
    382 __attribute__ ((noinline))
    383 int f40(int i, s40_no_align s1, s40_no_align s2) {
    384 // CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
    385 // CHECK: %s1 = alloca %struct.s40, align 8
    386 // CHECK: %s2 = alloca %struct.s40, align 8
    387 // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1
    388 // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1
    389 // CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 0
    390 // CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 0
    391 // CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 1
    392 // CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 1
    393   return s1.i + s2.i + i + s1.s + s2.s;
    394 }
    395 s40_no_align g40;
    396 s40_no_align g40_2;
    397 int caller40() {
    398 // CHECK: define i32 @caller40()
    399 // CHECK: %[[a:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
    400 // CHECK: %[[b:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
    401 // CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]])
    402   return f40(3, g40, g40_2);
    403 }
    404 // passing structs on stack
    405 __attribute__ ((noinline))
    406 int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
    407               int i9, s40_no_align s1, s40_no_align s2) {
    408 // CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
    409 // CHECK: %s1 = alloca %struct.s40, align 8
    410 // CHECK: %s2 = alloca %struct.s40, align 8
    411 // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1
    412 // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1
    413 // CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 0
    414 // CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 0
    415 // CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 1
    416 // CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 1
    417   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
    418 }
    419 int caller40_stack() {
    420 // CHECK: define i32 @caller40_stack()
    421 // CHECK: %[[a:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
    422 // CHECK: %[[b:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
    423 // CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]])
    424   return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2);
    425 }
    426 
    427 // structs with size <= 16 bytes, with alignment attribute
    428 struct s41
    429 {
    430   int i;
    431   short s;
    432   int i2;
    433   short s2;
    434 } __attribute__((aligned(16)));
    435 typedef struct s41 s41_with_align;
    436 // passing aligned structs in registers
    437 __attribute__ ((noinline))
    438 int f41(int i, s41_with_align s1, s41_with_align s2) {
    439 // CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
    440 // CHECK: %s1 = alloca %struct.s41, align 16
    441 // CHECK: %s2 = alloca %struct.s41, align 16
    442 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
    443 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
    444 // CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 0
    445 // CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 0
    446 // CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 1
    447 // CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 1
    448   return s1.i + s2.i + i + s1.s + s2.s;
    449 }
    450 s41_with_align g41;
    451 s41_with_align g41_2;
    452 int caller41() {
    453 // CHECK: define i32 @caller41()
    454 // CHECK: %[[a:.*]] = load i128* bitcast (%struct.s41* @g41 to i128*), align 1
    455 // CHECK: %[[b:.*]] = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
    456 // CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]])
    457   return f41(3, g41, g41_2);
    458 }
    459 // passing aligned structs on stack
    460 __attribute__ ((noinline))
    461 int f41_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
    462               int i9, s41_with_align s1, s41_with_align s2) {
    463 // CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
    464 // CHECK: %s1 = alloca %struct.s41, align 16
    465 // CHECK: %s2 = alloca %struct.s41, align 16
    466 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
    467 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
    468 // CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 0
    469 // CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 0
    470 // CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 1
    471 // CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 1
    472   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
    473 }
    474 int caller41_stack() {
    475 // CHECK: define i32 @caller41_stack()
    476 // CHECK: %[[a:.*]] = load i128* bitcast (%struct.s41* @g41 to i128*), align 1
    477 // CHECK: %[[b:.*]] = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
    478 // CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
    479   return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2);
    480 }
    481 
    482 // structs with size > 16 bytes, without alignment attribute
    483 struct s42
    484 {
    485   int i;
    486   short s;
    487   int i2;
    488   short s2;
    489   int i3;
    490   short s3;
    491 };
    492 typedef struct s42 s42_no_align;
    493 // passing structs in registers
    494 __attribute__ ((noinline))
    495 int f42(int i, s42_no_align s1, s42_no_align s2) {
    496 // CHECK: define i32 @f42(i32 %i, %struct.s42* %s1, %struct.s42* %s2)
    497 // CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 0
    498 // CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 0
    499 // CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 1
    500 // CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 1
    501   return s1.i + s2.i + i + s1.s + s2.s;
    502 }
    503 s42_no_align g42;
    504 s42_no_align g42_2;
    505 int caller42() {
    506 // CHECK: define i32 @caller42()
    507 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4
    508 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4
    509 // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8*
    510 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    511 // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8*
    512 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    513 // CHECK: call i32 @f42(i32 3, %struct.s42* %[[a]], %struct.s42* %[[b]])
    514   return f42(3, g42, g42_2);
    515 }
    516 // passing structs on stack
    517 __attribute__ ((noinline))
    518 int f42_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
    519               int i9, s42_no_align s1, s42_no_align s2) {
    520 // CHECK: define i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* %s1, %struct.s42* %s2)
    521 // CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 0
    522 // CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 0
    523 // CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 1
    524 // CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 1
    525   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
    526 }
    527 int caller42_stack() {
    528 // CHECK: define i32 @caller42_stack()
    529 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4
    530 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4
    531 // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8*
    532 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    533 // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8*
    534 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    535 // CHECK: call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s42* %[[a]], %struct.s42* %[[b]])
    536   return f42_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g42, g42_2);
    537 }
    538 
    539 // structs with size > 16 bytes, with alignment attribute
    540 struct s43
    541 {
    542   int i;
    543   short s;
    544   int i2;
    545   short s2;
    546   int i3;
    547   short s3;
    548 } __attribute__((aligned(16)));
    549 typedef struct s43 s43_with_align;
    550 // passing aligned structs in registers
    551 __attribute__ ((noinline))
    552 int f43(int i, s43_with_align s1, s43_with_align s2) {
    553 // CHECK: define i32 @f43(i32 %i, %struct.s43* %s1, %struct.s43* %s2)
    554 // CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 0
    555 // CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 0
    556 // CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 1
    557 // CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 1
    558   return s1.i + s2.i + i + s1.s + s2.s;
    559 }
    560 s43_with_align g43;
    561 s43_with_align g43_2;
    562 int caller43() {
    563 // CHECK: define i32 @caller43()
    564 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16
    565 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16
    566 // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8*
    567 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    568 // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8*
    569 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    570 // CHECK: call i32 @f43(i32 3, %struct.s43* %[[a]], %struct.s43* %[[b]])
    571   return f43(3, g43, g43_2);
    572 }
    573 // passing aligned structs on stack
    574 __attribute__ ((noinline))
    575 int f43_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
    576               int i9, s43_with_align s1, s43_with_align s2) {
    577 // CHECK: define i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s43* %s1, %struct.s43* %s2)
    578 // CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 0
    579 // CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 0
    580 // CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 1
    581 // CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 1
    582   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
    583 }
    584 int caller43_stack() {
    585 // CHECK: define i32 @caller43_stack()
    586 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16
    587 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16
    588 // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8*
    589 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    590 // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8*
    591 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    592 // CHECK: call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s43* %[[a]], %struct.s43* %[[b]])
    593   return f43_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g43, g43_2);
    594 }
    595 
    596 // rdar://13668927
    597 // We should not split argument s1 between registers and stack.
    598 __attribute__ ((noinline))
    599 int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
    600               s40_no_align s1, s40_no_align s2) {
    601 // CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
    602   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
    603 }
    604 int caller40_split() {
    605 // CHECK: define i32 @caller40_split()
    606 // CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, [2 x i64] %{{.*}} [2 x i64] %{{.*}})
    607   return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2);
    608 }
    609 
    610 __attribute__ ((noinline))
    611 int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
    612               s41_with_align s1, s41_with_align s2) {
    613 // CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], i128 %s1.coerce, i128 %s2.coerce)
    614   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
    615 }
    616 int caller41_split() {
    617 // CHECK: define i32 @caller41_split()
    618 // CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, i128 %{{.*}}, i128 %{{.*}})
    619   return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2);
    620 }
    621 
    622 // Handle homogeneous aggregates properly in variadic functions.
    623 struct HFA {
    624   float a, b, c, d;
    625 };
    626 
    627 float test_hfa(int n, ...) {
    628 // CHECK-LABEL: define float @test_hfa(i32 %n, ...)
    629 // CHECK: [[THELIST:%.*]] = alloca i8*
    630 // CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]]
    631 
    632   // HFA is not indirect, so occupies its full 16 bytes on the stack.
    633 // CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 16
    634 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
    635 
    636 // CHECK: bitcast i8* [[CURLIST]] to %struct.HFA*
    637   __builtin_va_list thelist;
    638   __builtin_va_start(thelist, n);
    639   struct HFA h = __builtin_va_arg(thelist, struct HFA);
    640   return h.d;
    641 }
    642 
    643 float test_hfa_call(struct HFA *a) {
    644 // CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a)
    645 // CHECK: call float (i32, ...)* @test_hfa(i32 1, [2 x double] {{.*}})
    646   test_hfa(1, *a);
    647 }
    648 
    649 struct TooBigHFA {
    650   float a, b, c, d, e;
    651 };
    652 
    653 float test_toobig_hfa(int n, ...) {
    654 // CHECK-LABEL: define float @test_toobig_hfa(i32 %n, ...)
    655 // CHECK: [[THELIST:%.*]] = alloca i8*
    656 // CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]]
    657 
    658   // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes
    659   // of stack consumed.
    660 // CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 8
    661 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
    662 
    663 // CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8**
    664 // CHECK: [[HFAPTR:%.*]] = load i8** [[HFAPTRPTR]]
    665 // CHECK: bitcast i8* [[HFAPTR]] to %struct.TooBigHFA*
    666   __builtin_va_list thelist;
    667   __builtin_va_start(thelist, n);
    668   struct TooBigHFA h = __builtin_va_arg(thelist, struct TooBigHFA);
    669   return h.d;
    670 }
    671 
    672 struct HVA {
    673   int32x4_t a, b;
    674 };
    675 
    676 int32x4_t test_hva(int n, ...) {
    677 // CHECK-LABEL: define <4 x i32> @test_hva(i32 %n, ...)
    678 // CHECK: [[THELIST:%.*]] = alloca i8*
    679 // CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]]
    680 
    681   // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
    682   // must be properly aligned.
    683 // CHECK: [[ALIGN0:%.*]] = getelementptr i8* [[CURLIST]], i32 15
    684 // CHECK: [[ALIGN1:%.*]] = ptrtoint i8* [[ALIGN0]] to i64
    685 // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
    686 // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
    687 
    688 // CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[ALIGNED_LIST]], i32 32
    689 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
    690 
    691 // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA*
    692   __builtin_va_list thelist;
    693   __builtin_va_start(thelist, n);
    694   struct HVA h = __builtin_va_arg(thelist, struct HVA);
    695   return h.b;
    696 }
    697 
    698 struct TooBigHVA {
    699   int32x4_t a, b, c, d, e;
    700 };
    701 
    702 int32x4_t test_toobig_hva(int n, ...) {
    703 // CHECK-LABEL: define <4 x i32> @test_toobig_hva(i32 %n, ...)
    704 // CHECK: [[THELIST:%.*]] = alloca i8*
    705 // CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]]
    706 
    707   // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes
    708   // of stack consumed.
    709 // CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 8
    710 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
    711 
    712 // CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8**
    713 // CHECK: [[HVAPTR:%.*]] = load i8** [[HVAPTRPTR]]
    714 // CHECK: bitcast i8* [[HVAPTR]] to %struct.TooBigHVA*
    715   __builtin_va_list thelist;
    716   __builtin_va_start(thelist, n);
    717   struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA);
    718   return h.d;
    719 }
    720