Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s
      2 
      3 // CHECK: define signext i8 @f0()
      4 char f0(void) {
      5   return 0;
      6 }
      7 
      8 // Struct as return type. Aggregates <= 16 bytes are passed directly and round
      9 // up to multiple of 8 bytes.
     10 // CHECK: define i64 @f1()
     11 struct s1 { char f0; };
     12 struct s1 f1(void) {}
     13 
     14 // CHECK: define i64 @f2()
     15 struct s2 { short f0; };
     16 struct s2 f2(void) {}
     17 
     18 // CHECK: define i64 @f3()
     19 struct s3 { int f0; };
     20 struct s3 f3(void) {}
     21 
     22 // CHECK: define i64 @f4()
     23 struct s4 { struct s4_0 { int f0; } f0; };
     24 struct s4 f4(void) {}
     25 
     26 // CHECK: define i64 @f5()
     27 struct s5 { struct { } f0; int f1; };
     28 struct s5 f5(void) {}
     29 
     30 // CHECK: define i64 @f6()
     31 struct s6 { int f0[1]; };
     32 struct s6 f6(void) {}
     33 
     34 // CHECK: define void @f7()
     35 struct s7 { struct { int : 0; } f0; };
     36 struct s7 f7(void) {}
     37 
     38 // CHECK: define void @f8()
     39 struct s8 { struct { int : 0; } f0[1]; };
     40 struct s8 f8(void) {}
     41 
     42 // CHECK: define i64 @f9()
     43 struct s9 { int f0; int : 0; };
     44 struct s9 f9(void) {}
     45 
     46 // CHECK: define i64 @f10()
     47 struct s10 { int f0; int : 0; int : 0; };
     48 struct s10 f10(void) {}
     49 
     50 // CHECK: define i64 @f11()
     51 struct s11 { int : 0; int f0; };
     52 struct s11 f11(void) {}
     53 
     54 // CHECK: define i64 @f12()
     55 union u12 { char f0; short f1; int f2; };
     56 union u12 f12(void) {}
     57 
     58 // Homogeneous Aggregate as return type will be passed directly.
     59 // CHECK: define %struct.s13 @f13()
     60 struct s13 { float f0; };
     61 struct s13 f13(void) {}
     62 // CHECK: define %union.u14 @f14()
     63 union u14 { float f0; };
     64 union u14 f14(void) {}
     65 
     66 // CHECK: define void @f15()
     67 void f15(struct s7 a0) {}
     68 
     69 // CHECK: define void @f16()
     70 void f16(struct s8 a0) {}
     71 
     72 // CHECK: define i64 @f17()
     73 struct s17 { short f0 : 13; char f1 : 4; };
     74 struct s17 f17(void) {}
     75 
     76 // CHECK: define i64 @f18()
     77 struct s18 { short f0; char f1 : 4; };
     78 struct s18 f18(void) {}
     79 
     80 // CHECK: define i64 @f19()
     81 struct s19 { int f0; struct s8 f1; };
     82 struct s19 f19(void) {}
     83 
     84 // CHECK: define i64 @f20()
     85 struct s20 { struct s8 f1; int f0; };
     86 struct s20 f20(void) {}
     87 
     88 // CHECK: define i64 @f21()
     89 struct s21 { struct {} f1; int f0 : 4; };
     90 struct s21 f21(void) {}
     91 
     92 // CHECK: define i64 @f22()
     93 // CHECK: define i64 @f23()
     94 // CHECK: define i64 @f24()
     95 // CHECK: define [2 x i64] @f25()
     96 // CHECK: define { float, float } @f26()
     97 // CHECK: define { double, double } @f27()
     98 _Complex char       f22(void) {}
     99 _Complex short      f23(void) {}
    100 _Complex int        f24(void) {}
    101 _Complex long long  f25(void) {}
    102 _Complex float      f26(void) {}
    103 _Complex double     f27(void) {}
    104 
    105 // CHECK: define i64 @f28()
    106 struct s28 { _Complex char f0; };
    107 struct s28 f28() {}
    108 
    109 // CHECK: define i64 @f29()
    110 struct s29 { _Complex short f0; };
    111 struct s29 f29() {}
    112 
    113 // CHECK: define i64 @f30()
    114 struct s30 { _Complex int f0; };
    115 struct s30 f30() {}
    116 
    117 struct s31 { char x; };
    118 void f31(struct s31 s) { }
    119 // CHECK: define void @f31(i64 %s.coerce)
    120 // CHECK: %s = alloca %struct.s31, align 1
    121 // CHECK: trunc i64 %s.coerce to i8
    122 // CHECK: store i8 %{{.*}},
    123 
    124 struct s32 { double x; };
    125 void f32(struct s32 s) { }
    126 // CHECK: @f32([1 x double] %{{.*}})
    127 
    128 // A composite type larger than 16 bytes should be passed indirectly.
    129 struct s33 { char buf[32*32]; };
    130 void f33(struct s33 s) { }
    131 // CHECK: define void @f33(%struct.s33* %s)
    132 
    133 struct s34 { char c; };
    134 void f34(struct s34 s);
    135 void g34(struct s34 *s) { f34(*s); }
    136 // CHECK: @g34(%struct.s34* %s)
    137 // CHECK: %[[a:.*]] = load i8, i8* %{{.*}}
    138 // CHECK: zext i8 %[[a]] to i64
    139 // CHECK: call void @f34(i64 %{{.*}})
    140 
    141 /*
    142  * Check that va_arg accesses stack according to ABI alignment
    143  */
    144 long long t1(int i, ...) {
    145     // CHECK: t1
    146     __builtin_va_list ap;
    147     __builtin_va_start(ap, i);
    148     // CHECK-NOT: add i32 %{{.*}} 7
    149     // CHECK-NOT: and i32 %{{.*}} -8
    150     long long ll = __builtin_va_arg(ap, long long);
    151     __builtin_va_end(ap);
    152     return ll;
    153 }
    154 double t2(int i, ...) {
    155     // CHECK: t2
    156     __builtin_va_list ap;
    157     __builtin_va_start(ap, i);
    158     // CHECK-NOT: add i32 %{{.*}} 7
    159     // CHECK-NOT: and i32 %{{.*}} -8
    160     double ll = __builtin_va_arg(ap, double);
    161     __builtin_va_end(ap);
    162     return ll;
    163 }
    164 
    165 #include <arm_neon.h>
    166 
    167 // Homogeneous Vector Aggregate as return type and argument type.
    168 // CHECK: define %struct.int8x16x2_t @f0_0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
    169 int8x16x2_t f0_0(int8x16_t a0, int8x16_t a1) {
    170   return vzipq_s8(a0, a1);
    171 }
    172 
    173 // Test direct vector passing.
    174 typedef float T_float32x2 __attribute__ ((__vector_size__ (8)));
    175 typedef float T_float32x4 __attribute__ ((__vector_size__ (16)));
    176 typedef float T_float32x8 __attribute__ ((__vector_size__ (32)));
    177 typedef float T_float32x16 __attribute__ ((__vector_size__ (64)));
    178 
    179 // CHECK: define <2 x float> @f1_0(<2 x float> %{{.*}})
    180 T_float32x2 f1_0(T_float32x2 a0) { return a0; }
    181 // CHECK: define <4 x float> @f1_1(<4 x float> %{{.*}})
    182 T_float32x4 f1_1(T_float32x4 a0) { return a0; }
    183 // Vector with length bigger than 16-byte is illegal and is passed indirectly.
    184 // CHECK: define void @f1_2(<8 x float>* noalias sret  %{{.*}}, <8 x float>*)
    185 T_float32x8 f1_2(T_float32x8 a0) { return a0; }
    186 // CHECK: define void @f1_3(<16 x float>* noalias sret %{{.*}}, <16 x float>*)
    187 T_float32x16 f1_3(T_float32x16 a0) { return a0; }
    188 
    189 // Testing alignment with aggregates: HFA, aggregates with size <= 16 bytes and
    190 // aggregates with size > 16 bytes.
    191 struct s35
    192 {
    193    float v[4]; //Testing HFA.
    194 } __attribute__((aligned(16)));
    195 typedef struct s35 s35_with_align;
    196 
    197 typedef __attribute__((neon_vector_type(4))) float float32x4_t;
    198 float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
    199 // CHECK: define <4 x float> @f35(i32 %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce)
    200 // CHECK: %s1 = alloca %struct.s35, align 16
    201 // CHECK: %s2 = alloca %struct.s35, align 16
    202 // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>*
    203 // CHECK: load <4 x float>, <4 x float>* %[[a]], align 16
    204 // CHECK: %[[b:.*]] = bitcast %struct.s35* %s2 to <4 x float>*
    205 // CHECK: load <4 x float>, <4 x float>* %[[b]], align 16
    206   float32x4_t v = vaddq_f32(*(float32x4_t *)&s1,
    207                             *(float32x4_t *)&s2);
    208   return v;
    209 }
    210 
    211 struct s36
    212 {
    213    int v[4]; //Testing 16-byte aggregate.
    214 } __attribute__((aligned(16)));
    215 typedef struct s36 s36_with_align;
    216 
    217 typedef __attribute__((neon_vector_type(4))) int int32x4_t;
    218 int32x4_t f36(int i, s36_with_align s1, s36_with_align s2) {
    219 // CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
    220 // CHECK: %s1 = alloca %struct.s36, align 16
    221 // CHECK: %s2 = alloca %struct.s36, align 16
    222 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
    223 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
    224 // CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>*
    225 // CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16
    226 // CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>*
    227 // CHECK: load <4 x i32>, <4 x i32>* %[[b]], align 16
    228   int32x4_t v = vaddq_s32(*(int32x4_t *)&s1,
    229                           *(int32x4_t *)&s2);
    230   return v;
    231 }
    232 
    233 struct s37
    234 {
    235    int v[18]; //Testing large aggregate.
    236 } __attribute__((aligned(16)));
    237 typedef struct s37 s37_with_align;
    238 
    239 int32x4_t f37(int i, s37_with_align s1, s37_with_align s2) {
    240 // CHECK: define <4 x i32> @f37(i32 %i, %struct.s37* %s1, %struct.s37* %s2)
    241 // CHECK: %[[a:.*]] = bitcast %struct.s37* %s1 to <4 x i32>*
    242 // CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16
    243 // CHECK: %[[b:.*]] = bitcast %struct.s37* %s2 to <4 x i32>*
    244 // CHECK: load <4 x i32>, <4 x i32>* %[[b]], align 16
    245   int32x4_t v = vaddq_s32(*(int32x4_t *)&s1,
    246                           *(int32x4_t *)&s2);
    247   return v;
    248 }
    249 s37_with_align g37;
    250 int32x4_t caller37() {
    251 // CHECK: caller37
    252 // CHECK: %[[a:.*]] = alloca %struct.s37, align 16
    253 // CHECK: %[[b:.*]] = alloca %struct.s37, align 16
    254 // CHECK: call void @llvm.memcpy
    255 // CHECK: call void @llvm.memcpy
    256 // CHECK: call <4 x i32> @f37(i32 3, %struct.s37* %[[a]], %struct.s37* %[[b]])
    257   return f37(3, g37, g37);
    258 }
    259 
    260 // rdar://problem/12648441
    261 // Test passing structs with size < 8, < 16 and > 16
    262 // with alignment of 16 and without
    263 
    264 // structs with size <= 8 bytes, without alignment attribute
    265 // passed as i64 regardless of the align attribute
    266 struct s38
    267 {
    268   int i;
    269   short s;
    270 };
    271 typedef struct s38 s38_no_align;
    272 // passing structs in registers
    273 __attribute__ ((noinline))
    274 int f38(int i, s38_no_align s1, s38_no_align s2) {
    275 // CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce)
    276 // CHECK: %s1 = alloca %struct.s38, align 4
    277 // CHECK: %s2 = alloca %struct.s38, align 4
    278 // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 4
    279 // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 4
    280 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
    281 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
    282 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
    283 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 1
    284   return s1.i + s2.i + i + s1.s + s2.s;
    285 }
    286 s38_no_align g38;
    287 s38_no_align g38_2;
    288 int caller38() {
    289 // CHECK: define i32 @caller38()
    290 // CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
    291 // CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
    292 // CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]])
    293   return f38(3, g38, g38_2);
    294 }
    295 // passing structs on stack
    296 __attribute__ ((noinline))
    297 int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
    298               int i9, s38_no_align s1, s38_no_align s2) {
    299 // CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce)
    300 // CHECK: %s1 = alloca %struct.s38, align 4
    301 // CHECK: %s2 = alloca %struct.s38, align 4
    302 // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 4
    303 // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 4
    304 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
    305 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
    306 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
    307 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 1
    308   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
    309 }
    310 int caller38_stack() {
    311 // CHECK: define i32 @caller38_stack()
    312 // CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
    313 // CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
    314 // CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]])
    315   return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2);
    316 }
    317 
    318 // structs with size <= 8 bytes, with alignment attribute
    319 struct s39
    320 {
    321   int i;
    322   short s;
    323 } __attribute__((aligned(16)));
    324 typedef struct s39 s39_with_align;
    325 // passing aligned structs in registers
    326 __attribute__ ((noinline))
    327 int f39(int i, s39_with_align s1, s39_with_align s2) {
    328 // CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
    329 // CHECK: %s1 = alloca %struct.s39, align 16
    330 // CHECK: %s2 = alloca %struct.s39, align 16
    331 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
    332 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
    333 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
    334 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
    335 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
    336 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 1
    337   return s1.i + s2.i + i + s1.s + s2.s;
    338 }
    339 s39_with_align g39;
    340 s39_with_align g39_2;
    341 int caller39() {
    342 // CHECK: define i32 @caller39()
    343 // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
    344 // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
    345 // CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]])
    346   return f39(3, g39, g39_2);
    347 }
    348 // passing aligned structs on stack
    349 __attribute__ ((noinline))
    350 int f39_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
    351               int i9, s39_with_align s1, s39_with_align s2) {
    352 // CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
    353 // CHECK: %s1 = alloca %struct.s39, align 16
    354 // CHECK: %s2 = alloca %struct.s39, align 16
    355 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
    356 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
    357 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
    358 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
    359 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
    360 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 1
    361   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
    362 }
    363 int caller39_stack() {
    364 // CHECK: define i32 @caller39_stack()
    365 // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
    366 // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
    367 // CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
    368   return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2);
    369 }
    370 
    371 // structs with size <= 16 bytes, without alignment attribute
    372 struct s40
    373 {
    374   int i;
    375   short s;
    376   int i2;
    377   short s2;
    378 };
    379 typedef struct s40 s40_no_align;
    380 // passing structs in registers
    381 __attribute__ ((noinline))
    382 int f40(int i, s40_no_align s1, s40_no_align s2) {
    383 // CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
    384 // CHECK: %s1 = alloca %struct.s40, align 4
    385 // CHECK: %s2 = alloca %struct.s40, align 4
    386 // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 4
    387 // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 4
    388 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
    389 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
    390 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
    391 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 1
    392   return s1.i + s2.i + i + s1.s + s2.s;
    393 }
    394 s40_no_align g40;
    395 s40_no_align g40_2;
    396 int caller40() {
    397 // CHECK: define i32 @caller40()
    398 // CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
    399 // CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
    400 // CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]])
    401   return f40(3, g40, g40_2);
    402 }
    403 // passing structs on stack
    404 __attribute__ ((noinline))
    405 int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
    406               int i9, s40_no_align s1, s40_no_align s2) {
    407 // CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
    408 // CHECK: %s1 = alloca %struct.s40, align 4
    409 // CHECK: %s2 = alloca %struct.s40, align 4
    410 // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 4
    411 // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 4
    412 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
    413 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
    414 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
    415 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 1
    416   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
    417 }
    418 int caller40_stack() {
    419 // CHECK: define i32 @caller40_stack()
    420 // CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
    421 // CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
    422 // CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]])
    423   return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2);
    424 }
    425 
    426 // structs with size <= 16 bytes, with alignment attribute
    427 struct s41
    428 {
    429   int i;
    430   short s;
    431   int i2;
    432   short s2;
    433 } __attribute__((aligned(16)));
    434 typedef struct s41 s41_with_align;
    435 // passing aligned structs in registers
    436 __attribute__ ((noinline))
    437 int f41(int i, s41_with_align s1, s41_with_align s2) {
    438 // CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
    439 // CHECK: %s1 = alloca %struct.s41, align 16
    440 // CHECK: %s2 = alloca %struct.s41, align 16
    441 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
    442 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
    443 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
    444 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
    445 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
    446 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 1
    447   return s1.i + s2.i + i + s1.s + s2.s;
    448 }
    449 s41_with_align g41;
    450 s41_with_align g41_2;
    451 int caller41() {
    452 // CHECK: define i32 @caller41()
    453 // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
    454 // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
    455 // CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]])
    456   return f41(3, g41, g41_2);
    457 }
    458 // passing aligned structs on stack
    459 __attribute__ ((noinline))
    460 int f41_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
    461               int i9, s41_with_align s1, s41_with_align s2) {
    462 // CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
    463 // CHECK: %s1 = alloca %struct.s41, align 16
    464 // CHECK: %s2 = alloca %struct.s41, align 16
    465 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16
    466 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16
    467 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
    468 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
    469 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
    470 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 1
    471   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
    472 }
    473 int caller41_stack() {
    474 // CHECK: define i32 @caller41_stack()
    475 // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
    476 // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
    477 // CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
    478   return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2);
    479 }
    480 
    481 // structs with size > 16 bytes, without alignment attribute
    482 struct s42
    483 {
    484   int i;
    485   short s;
    486   int i2;
    487   short s2;
    488   int i3;
    489   short s3;
    490 };
    491 typedef struct s42 s42_no_align;
    492 // passing structs in registers
    493 __attribute__ ((noinline))
    494 int f42(int i, s42_no_align s1, s42_no_align s2) {
    495 // CHECK: define i32 @f42(i32 %i, %struct.s42* %s1, %struct.s42* %s2)
    496 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 0
    497 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 0
    498 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 1
    499 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 1
    500   return s1.i + s2.i + i + s1.s + s2.s;
    501 }
    502 s42_no_align g42;
    503 s42_no_align g42_2;
    504 int caller42() {
    505 // CHECK: define i32 @caller42()
    506 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4
    507 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4
    508 // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8*
    509 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    510 // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8*
    511 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    512 // CHECK: call i32 @f42(i32 3, %struct.s42* %[[a]], %struct.s42* %[[b]])
    513   return f42(3, g42, g42_2);
    514 }
    515 // passing structs on stack
    516 __attribute__ ((noinline))
    517 int f42_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
    518               int i9, s42_no_align s1, s42_no_align s2) {
    519 // CHECK: define i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* %s1, %struct.s42* %s2)
    520 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 0
    521 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 0
    522 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 1
    523 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 1
    524   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
    525 }
    526 int caller42_stack() {
    527 // CHECK: define i32 @caller42_stack()
    528 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4
    529 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4
    530 // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8*
    531 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    532 // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8*
    533 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    534 // CHECK: call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s42* %[[a]], %struct.s42* %[[b]])
    535   return f42_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g42, g42_2);
    536 }
    537 
    538 // structs with size > 16 bytes, with alignment attribute
    539 struct s43
    540 {
    541   int i;
    542   short s;
    543   int i2;
    544   short s2;
    545   int i3;
    546   short s3;
    547 } __attribute__((aligned(16)));
    548 typedef struct s43 s43_with_align;
    549 // passing aligned structs in registers
    550 __attribute__ ((noinline))
    551 int f43(int i, s43_with_align s1, s43_with_align s2) {
    552 // CHECK: define i32 @f43(i32 %i, %struct.s43* %s1, %struct.s43* %s2)
    553 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 0
    554 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 0
    555 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 1
    556 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 1
    557   return s1.i + s2.i + i + s1.s + s2.s;
    558 }
    559 s43_with_align g43;
    560 s43_with_align g43_2;
    561 int caller43() {
    562 // CHECK: define i32 @caller43()
    563 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16
    564 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16
    565 // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8*
    566 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    567 // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8*
    568 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    569 // CHECK: call i32 @f43(i32 3, %struct.s43* %[[a]], %struct.s43* %[[b]])
    570   return f43(3, g43, g43_2);
    571 }
    572 // passing aligned structs on stack
    573 __attribute__ ((noinline))
    574 int f43_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
    575               int i9, s43_with_align s1, s43_with_align s2) {
    576 // CHECK: define i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s43* %s1, %struct.s43* %s2)
    577 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 0
    578 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 0
    579 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 1
    580 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 1
    581   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
    582 }
    583 int caller43_stack() {
    584 // CHECK: define i32 @caller43_stack()
    585 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16
    586 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16
    587 // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8*
    588 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    589 // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8*
    590 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
    591 // CHECK: call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s43* %[[a]], %struct.s43* %[[b]])
    592   return f43_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g43, g43_2);
    593 }
    594 
    595 // rdar://13668927
    596 // We should not split argument s1 between registers and stack.
    597 __attribute__ ((noinline))
    598 int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
    599               s40_no_align s1, s40_no_align s2) {
    600 // CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
    601   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
    602 }
    603 int caller40_split() {
    604 // CHECK: define i32 @caller40_split()
    605 // CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [2 x i64] %{{.*}} [2 x i64] %{{.*}})
    606   return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2);
    607 }
    608 
    609 __attribute__ ((noinline))
    610 int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
    611               s41_with_align s1, s41_with_align s2) {
    612 // CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i128 %s1.coerce, i128 %s2.coerce)
    613   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
    614 }
    615 int caller41_split() {
    616 // CHECK: define i32 @caller41_split()
    617 // CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 %{{.*}}, i128 %{{.*}})
    618   return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2);
    619 }
    620 
    621 // Handle homogeneous aggregates properly in variadic functions.
    622 struct HFA {
    623   float a, b, c, d;
    624 };
    625 
    626 float test_hfa(int n, ...) {
    627 // CHECK-LABEL: define float @test_hfa(i32 %n, ...)
    628 // CHECK: [[THELIST:%.*]] = alloca i8*
    629 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
    630 
    631   // HFA is not indirect, so occupies its full 16 bytes on the stack.
    632 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 16
    633 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
    634 
    635 // CHECK: bitcast i8* [[CURLIST]] to %struct.HFA*
    636   __builtin_va_list thelist;
    637   __builtin_va_start(thelist, n);
    638   struct HFA h = __builtin_va_arg(thelist, struct HFA);
    639   return h.d;
    640 }
    641 
    642 float test_hfa_call(struct HFA *a) {
    643 // CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a)
    644 // CHECK: call float (i32, ...) @test_hfa(i32 1, [4 x float] {{.*}})
    645   test_hfa(1, *a);
    646 }
    647 
    648 struct TooBigHFA {
    649   float a, b, c, d, e;
    650 };
    651 
    652 float test_toobig_hfa(int n, ...) {
    653 // CHECK-LABEL: define float @test_toobig_hfa(i32 %n, ...)
    654 // CHECK: [[THELIST:%.*]] = alloca i8*
    655 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
    656 
    657   // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes
    658   // of stack consumed.
    659 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8
    660 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
    661 
    662 // CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHFA**
    663 // CHECK: [[HFAPTR:%.*]] = load %struct.TooBigHFA*, %struct.TooBigHFA** [[HFAPTRPTR]]
    664   __builtin_va_list thelist;
    665   __builtin_va_start(thelist, n);
    666   struct TooBigHFA h = __builtin_va_arg(thelist, struct TooBigHFA);
    667   return h.d;
    668 }
    669 
    670 struct HVA {
    671   int32x4_t a, b;
    672 };
    673 
    674 int32x4_t test_hva(int n, ...) {
    675 // CHECK-LABEL: define <4 x i32> @test_hva(i32 %n, ...)
    676 // CHECK: [[THELIST:%.*]] = alloca i8*
    677 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
    678 
    679   // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
    680   // must be properly aligned.
    681 // CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64
    682 // CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15
    683 // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
    684 // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
    685 
    686 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 32
    687 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
    688 
    689 // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA*
    690   __builtin_va_list thelist;
    691   __builtin_va_start(thelist, n);
    692   struct HVA h = __builtin_va_arg(thelist, struct HVA);
    693   return h.b;
    694 }
    695 
    696 struct TooBigHVA {
    697   int32x4_t a, b, c, d, e;
    698 };
    699 
    700 int32x4_t test_toobig_hva(int n, ...) {
    701 // CHECK-LABEL: define <4 x i32> @test_toobig_hva(i32 %n, ...)
    702 // CHECK: [[THELIST:%.*]] = alloca i8*
    703 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
    704 
    705   // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes
    706   // of stack consumed.
    707 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8
    708 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
    709 
    710 // CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHVA**
    711 // CHECK: [[HVAPTR:%.*]] = load %struct.TooBigHVA*, %struct.TooBigHVA** [[HVAPTRPTR]]
    712   __builtin_va_list thelist;
    713   __builtin_va_start(thelist, n);
    714   struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA);
    715   return h.d;
    716 }
    717 
    718 typedef __attribute__((__ext_vector_type__(3))) float float32x3_t;
    719 typedef struct { float32x3_t arr[4]; } HFAv3;
    720 
    721 float32x3_t test_hva_v3(int n, ...) {
    722 // CHECK-LABEL: define <3 x float> @test_hva_v3(i32 %n, ...)
    723 // CHECK: [[THELIST:%.*]] = alloca i8*
    724 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
    725 
    726   // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
    727   // must be properly aligned.
    728 // CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64
    729 // CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15
    730 // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
    731 // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
    732 
    733 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 64
    734 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
    735 
    736 // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HFAv3*
    737   __builtin_va_list l;
    738   __builtin_va_start(l, n);
    739   HFAv3 r = __builtin_va_arg(l, HFAv3);
    740   return r.arr[2];
    741 }
    742 
    743 float32x3_t test_hva_v3_call(HFAv3 *a) {
    744 // CHECK-LABEL: define <3 x float> @test_hva_v3_call(%struct.HFAv3* %a)
    745 // CHECK: call <3 x float> (i32, ...) @test_hva_v3(i32 1, [4 x <4 x float>] {{.*}})
    746   return test_hva_v3(1, *a);
    747 }
    748