1 // RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s 2 3 // CHECK: define signext i8 @f0() 4 char f0(void) { 5 return 0; 6 } 7 8 // Struct as return type. Aggregates <= 16 bytes are passed directly and round 9 // up to multiple of 8 bytes. 10 // CHECK: define i64 @f1() 11 struct s1 { char f0; }; 12 struct s1 f1(void) {} 13 14 // CHECK: define i64 @f2() 15 struct s2 { short f0; }; 16 struct s2 f2(void) {} 17 18 // CHECK: define i64 @f3() 19 struct s3 { int f0; }; 20 struct s3 f3(void) {} 21 22 // CHECK: define i64 @f4() 23 struct s4 { struct s4_0 { int f0; } f0; }; 24 struct s4 f4(void) {} 25 26 // CHECK: define i64 @f5() 27 struct s5 { struct { } f0; int f1; }; 28 struct s5 f5(void) {} 29 30 // CHECK: define i64 @f6() 31 struct s6 { int f0[1]; }; 32 struct s6 f6(void) {} 33 34 // CHECK: define void @f7() 35 struct s7 { struct { int : 0; } f0; }; 36 struct s7 f7(void) {} 37 38 // CHECK: define void @f8() 39 struct s8 { struct { int : 0; } f0[1]; }; 40 struct s8 f8(void) {} 41 42 // CHECK: define i64 @f9() 43 struct s9 { int f0; int : 0; }; 44 struct s9 f9(void) {} 45 46 // CHECK: define i64 @f10() 47 struct s10 { int f0; int : 0; int : 0; }; 48 struct s10 f10(void) {} 49 50 // CHECK: define i64 @f11() 51 struct s11 { int : 0; int f0; }; 52 struct s11 f11(void) {} 53 54 // CHECK: define i64 @f12() 55 union u12 { char f0; short f1; int f2; }; 56 union u12 f12(void) {} 57 58 // Homogeneous Aggregate as return type will be passed directly. 59 // CHECK: define %struct.s13 @f13() 60 struct s13 { float f0; }; 61 struct s13 f13(void) {} 62 // CHECK: define %union.u14 @f14() 63 union u14 { float f0; }; 64 union u14 f14(void) {} 65 66 // CHECK: define void @f15() 67 void f15(struct s7 a0) {} 68 69 // CHECK: define void @f16() 70 void f16(struct s8 a0) {} 71 72 // CHECK: define i64 @f17() 73 struct s17 { short f0 : 13; char f1 : 4; }; 74 struct s17 f17(void) {} 75 76 // CHECK: define i64 @f18() 77 struct s18 { short f0; char f1 : 4; }; 78 struct s18 f18(void) {} 79 80 // CHECK: define i64 @f19() 81 struct s19 { int f0; struct s8 f1; }; 82 struct s19 f19(void) {} 83 84 // CHECK: define i64 @f20() 85 struct s20 { struct s8 f1; int f0; }; 86 struct s20 f20(void) {} 87 88 // CHECK: define i64 @f21() 89 struct s21 { struct {} f1; int f0 : 4; }; 90 struct s21 f21(void) {} 91 92 // CHECK: define i64 @f22() 93 // CHECK: define i64 @f23() 94 // CHECK: define i64 @f24() 95 // CHECK: define i128 @f25() 96 // CHECK: define { float, float } @f26() 97 // CHECK: define { double, double } @f27() 98 _Complex char f22(void) {} 99 _Complex short f23(void) {} 100 _Complex int f24(void) {} 101 _Complex long long f25(void) {} 102 _Complex float f26(void) {} 103 _Complex double f27(void) {} 104 105 // CHECK: define i64 @f28() 106 struct s28 { _Complex char f0; }; 107 struct s28 f28() {} 108 109 // CHECK: define i64 @f29() 110 struct s29 { _Complex short f0; }; 111 struct s29 f29() {} 112 113 // CHECK: define i64 @f30() 114 struct s30 { _Complex int f0; }; 115 struct s30 f30() {} 116 117 struct s31 { char x; }; 118 void f31(struct s31 s) { } 119 // CHECK: define void @f31(i64 %s.coerce) 120 // CHECK: %s = alloca %struct.s31, align 8 121 // CHECK: trunc i64 %s.coerce to i8 122 // CHECK: store i8 %{{.*}}, 123 124 struct s32 { double x; }; 125 void f32(struct s32 s) { } 126 // Expand Homogeneous Aggregate. 127 // CHECK: @f32(double %{{.*}}) 128 129 // A composite type larger than 16 bytes should be passed indirectly. 130 struct s33 { char buf[32*32]; }; 131 void f33(struct s33 s) { } 132 // CHECK: define void @f33(%struct.s33* %s) 133 134 struct s34 { char c; }; 135 void f34(struct s34 s); 136 void g34(struct s34 *s) { f34(*s); } 137 // CHECK: @g34(%struct.s34* %s) 138 // CHECK: %[[a:.*]] = load i8* %{{.*}} 139 // CHECK: zext i8 %[[a]] to i64 140 // CHECK: call void @f34(i64 %{{.*}}) 141 142 /* 143 * Check that va_arg accesses stack according to ABI alignment 144 */ 145 long long t1(int i, ...) { 146 // CHECK: t1 147 __builtin_va_list ap; 148 __builtin_va_start(ap, i); 149 // CHECK-NOT: add i32 %{{.*}} 7 150 // CHECK-NOT: and i32 %{{.*}} -8 151 long long ll = __builtin_va_arg(ap, long long); 152 __builtin_va_end(ap); 153 return ll; 154 } 155 double t2(int i, ...) { 156 // CHECK: t2 157 __builtin_va_list ap; 158 __builtin_va_start(ap, i); 159 // CHECK-NOT: add i32 %{{.*}} 7 160 // CHECK-NOT: and i32 %{{.*}} -8 161 double ll = __builtin_va_arg(ap, double); 162 __builtin_va_end(ap); 163 return ll; 164 } 165 166 #include <arm_neon.h> 167 168 // Homogeneous Vector Aggregate as return type and argument type. 169 // CHECK: define %struct.int8x16x2_t @f0_0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 170 int8x16x2_t f0_0(int8x16_t a0, int8x16_t a1) { 171 return vzipq_s8(a0, a1); 172 } 173 174 // Test direct vector passing. 175 typedef float T_float32x2 __attribute__ ((__vector_size__ (8))); 176 typedef float T_float32x4 __attribute__ ((__vector_size__ (16))); 177 typedef float T_float32x8 __attribute__ ((__vector_size__ (32))); 178 typedef float T_float32x16 __attribute__ ((__vector_size__ (64))); 179 180 // CHECK: define <2 x float> @f1_0(<2 x float> %{{.*}}) 181 T_float32x2 f1_0(T_float32x2 a0) { return a0; } 182 // CHECK: define <4 x float> @f1_1(<4 x float> %{{.*}}) 183 T_float32x4 f1_1(T_float32x4 a0) { return a0; } 184 // Vector with length bigger than 16-byte is illegal and is passed indirectly. 185 // CHECK: define void @f1_2(<8 x float>* noalias sret %{{.*}}, <8 x float>*) 186 T_float32x8 f1_2(T_float32x8 a0) { return a0; } 187 // CHECK: define void @f1_3(<16 x float>* noalias sret %{{.*}}, <16 x float>*) 188 T_float32x16 f1_3(T_float32x16 a0) { return a0; } 189 190 // Testing alignment with aggregates: HFA, aggregates with size <= 16 bytes and 191 // aggregates with size > 16 bytes. 192 struct s35 193 { 194 float v[4]; //Testing HFA. 195 } __attribute__((aligned(16))); 196 typedef struct s35 s35_with_align; 197 198 typedef __attribute__((neon_vector_type(4))) float float32x4_t; 199 float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) { 200 // CHECK: define <4 x float> @f35(i32 %i, float %s1.0, float %s1.1, float %s1.2, float %s1.3, float %s2.0, float %s2.1, float %s2.2, float %s2.3) 201 // CHECK: %s1 = alloca %struct.s35, align 16 202 // CHECK: %s2 = alloca %struct.s35, align 16 203 // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>* 204 // CHECK: load <4 x float>* %[[a]], align 16 205 // CHECK: %[[b:.*]] = bitcast %struct.s35* %s2 to <4 x float>* 206 // CHECK: load <4 x float>* %[[b]], align 16 207 float32x4_t v = vaddq_f32(*(float32x4_t *)&s1, 208 *(float32x4_t *)&s2); 209 return v; 210 } 211 212 struct s36 213 { 214 int v[4]; //Testing 16-byte aggregate. 215 } __attribute__((aligned(16))); 216 typedef struct s36 s36_with_align; 217 218 typedef __attribute__((neon_vector_type(4))) int int32x4_t; 219 int32x4_t f36(int i, s36_with_align s1, s36_with_align s2) { 220 // CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce) 221 // CHECK: %s1 = alloca %struct.s36, align 16 222 // CHECK: %s2 = alloca %struct.s36, align 16 223 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 224 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 225 // CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>* 226 // CHECK: load <4 x i32>* %[[a]], align 16 227 // CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>* 228 // CHECK: load <4 x i32>* %[[b]], align 16 229 int32x4_t v = vaddq_s32(*(int32x4_t *)&s1, 230 *(int32x4_t *)&s2); 231 return v; 232 } 233 234 struct s37 235 { 236 int v[18]; //Testing large aggregate. 237 } __attribute__((aligned(16))); 238 typedef struct s37 s37_with_align; 239 240 int32x4_t f37(int i, s37_with_align s1, s37_with_align s2) { 241 // CHECK: define <4 x i32> @f37(i32 %i, %struct.s37* %s1, %struct.s37* %s2) 242 // CHECK: %[[a:.*]] = bitcast %struct.s37* %s1 to <4 x i32>* 243 // CHECK: load <4 x i32>* %[[a]], align 16 244 // CHECK: %[[b:.*]] = bitcast %struct.s37* %s2 to <4 x i32>* 245 // CHECK: load <4 x i32>* %[[b]], align 16 246 int32x4_t v = vaddq_s32(*(int32x4_t *)&s1, 247 *(int32x4_t *)&s2); 248 return v; 249 } 250 s37_with_align g37; 251 int32x4_t caller37() { 252 // CHECK: caller37 253 // CHECK: %[[a:.*]] = alloca %struct.s37, align 16 254 // CHECK: %[[b:.*]] = alloca %struct.s37, align 16 255 // CHECK: call void @llvm.memcpy 256 // CHECK: call void @llvm.memcpy 257 // CHECK: call <4 x i32> @f37(i32 3, %struct.s37* %[[a]], %struct.s37* %[[b]]) 258 return f37(3, g37, g37); 259 } 260 261 // rdar://problem/12648441 262 // Test passing structs with size < 8, < 16 and > 16 263 // with alignment of 16 and without 264 265 // structs with size <= 8 bytes, without alignment attribute 266 // passed as i64 regardless of the align attribute 267 struct s38 268 { 269 int i; 270 short s; 271 }; 272 typedef struct s38 s38_no_align; 273 // passing structs in registers 274 __attribute__ ((noinline)) 275 int f38(int i, s38_no_align s1, s38_no_align s2) { 276 // CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) 277 // CHECK: %s1 = alloca %struct.s38, align 8 278 // CHECK: %s2 = alloca %struct.s38, align 8 279 // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1 280 // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1 281 // CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 0 282 // CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 0 283 // CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 1 284 // CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 1 285 return s1.i + s2.i + i + s1.s + s2.s; 286 } 287 s38_no_align g38; 288 s38_no_align g38_2; 289 int caller38() { 290 // CHECK: define i32 @caller38() 291 // CHECK: %[[a:.*]] = load i64* bitcast (%struct.s38* @g38 to i64*), align 1 292 // CHECK: %[[b:.*]] = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 1 293 // CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]]) 294 return f38(3, g38, g38_2); 295 } 296 // passing structs on stack 297 __attribute__ ((noinline)) 298 int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, 299 int i9, s38_no_align s1, s38_no_align s2) { 300 // CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) 301 // CHECK: %s1 = alloca %struct.s38, align 8 302 // CHECK: %s2 = alloca %struct.s38, align 8 303 // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1 304 // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1 305 // CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 0 306 // CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 0 307 // CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 1 308 // CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 1 309 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; 310 } 311 int caller38_stack() { 312 // CHECK: define i32 @caller38_stack() 313 // CHECK: %[[a:.*]] = load i64* bitcast (%struct.s38* @g38 to i64*), align 1 314 // CHECK: %[[b:.*]] = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 1 315 // CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]]) 316 return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2); 317 } 318 319 // structs with size <= 8 bytes, with alignment attribute 320 struct s39 321 { 322 int i; 323 short s; 324 } __attribute__((aligned(16))); 325 typedef struct s39 s39_with_align; 326 // passing aligned structs in registers 327 __attribute__ ((noinline)) 328 int f39(int i, s39_with_align s1, s39_with_align s2) { 329 // CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) 330 // CHECK: %s1 = alloca %struct.s39, align 16 331 // CHECK: %s2 = alloca %struct.s39, align 16 332 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 333 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 334 // CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 0 335 // CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 0 336 // CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 1 337 // CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 1 338 return s1.i + s2.i + i + s1.s + s2.s; 339 } 340 s39_with_align g39; 341 s39_with_align g39_2; 342 int caller39() { 343 // CHECK: define i32 @caller39() 344 // CHECK: %[[a:.*]] = load i128* bitcast (%struct.s39* @g39 to i128*), align 1 345 // CHECK: %[[b:.*]] = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 1 346 // CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]]) 347 return f39(3, g39, g39_2); 348 } 349 // passing aligned structs on stack 350 __attribute__ ((noinline)) 351 int f39_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, 352 int i9, s39_with_align s1, s39_with_align s2) { 353 // CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) 354 // CHECK: %s1 = alloca %struct.s39, align 16 355 // CHECK: %s2 = alloca %struct.s39, align 16 356 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 357 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 358 // CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 0 359 // CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 0 360 // CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 1 361 // CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 1 362 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; 363 } 364 int caller39_stack() { 365 // CHECK: define i32 @caller39_stack() 366 // CHECK: %[[a:.*]] = load i128* bitcast (%struct.s39* @g39 to i128*), align 1 367 // CHECK: %[[b:.*]] = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 1 368 // CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]]) 369 return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2); 370 } 371 372 // structs with size <= 16 bytes, without alignment attribute 373 struct s40 374 { 375 int i; 376 short s; 377 int i2; 378 short s2; 379 }; 380 typedef struct s40 s40_no_align; 381 // passing structs in registers 382 __attribute__ ((noinline)) 383 int f40(int i, s40_no_align s1, s40_no_align s2) { 384 // CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) 385 // CHECK: %s1 = alloca %struct.s40, align 8 386 // CHECK: %s2 = alloca %struct.s40, align 8 387 // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1 388 // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1 389 // CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 0 390 // CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 0 391 // CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 1 392 // CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 1 393 return s1.i + s2.i + i + s1.s + s2.s; 394 } 395 s40_no_align g40; 396 s40_no_align g40_2; 397 int caller40() { 398 // CHECK: define i32 @caller40() 399 // CHECK: %[[a:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1 400 // CHECK: %[[b:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1 401 // CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]]) 402 return f40(3, g40, g40_2); 403 } 404 // passing structs on stack 405 __attribute__ ((noinline)) 406 int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, 407 int i9, s40_no_align s1, s40_no_align s2) { 408 // CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) 409 // CHECK: %s1 = alloca %struct.s40, align 8 410 // CHECK: %s2 = alloca %struct.s40, align 8 411 // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1 412 // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1 413 // CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 0 414 // CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 0 415 // CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 1 416 // CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 1 417 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; 418 } 419 int caller40_stack() { 420 // CHECK: define i32 @caller40_stack() 421 // CHECK: %[[a:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1 422 // CHECK: %[[b:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1 423 // CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]]) 424 return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2); 425 } 426 427 // structs with size <= 16 bytes, with alignment attribute 428 struct s41 429 { 430 int i; 431 short s; 432 int i2; 433 short s2; 434 } __attribute__((aligned(16))); 435 typedef struct s41 s41_with_align; 436 // passing aligned structs in registers 437 __attribute__ ((noinline)) 438 int f41(int i, s41_with_align s1, s41_with_align s2) { 439 // CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) 440 // CHECK: %s1 = alloca %struct.s41, align 16 441 // CHECK: %s2 = alloca %struct.s41, align 16 442 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 443 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 444 // CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 0 445 // CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 0 446 // CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 1 447 // CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 1 448 return s1.i + s2.i + i + s1.s + s2.s; 449 } 450 s41_with_align g41; 451 s41_with_align g41_2; 452 int caller41() { 453 // CHECK: define i32 @caller41() 454 // CHECK: %[[a:.*]] = load i128* bitcast (%struct.s41* @g41 to i128*), align 1 455 // CHECK: %[[b:.*]] = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 1 456 // CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]]) 457 return f41(3, g41, g41_2); 458 } 459 // passing aligned structs on stack 460 __attribute__ ((noinline)) 461 int f41_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, 462 int i9, s41_with_align s1, s41_with_align s2) { 463 // CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) 464 // CHECK: %s1 = alloca %struct.s41, align 16 465 // CHECK: %s2 = alloca %struct.s41, align 16 466 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 467 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 468 // CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 0 469 // CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 0 470 // CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 1 471 // CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 1 472 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; 473 } 474 int caller41_stack() { 475 // CHECK: define i32 @caller41_stack() 476 // CHECK: %[[a:.*]] = load i128* bitcast (%struct.s41* @g41 to i128*), align 1 477 // CHECK: %[[b:.*]] = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 1 478 // CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]]) 479 return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2); 480 } 481 482 // structs with size > 16 bytes, without alignment attribute 483 struct s42 484 { 485 int i; 486 short s; 487 int i2; 488 short s2; 489 int i3; 490 short s3; 491 }; 492 typedef struct s42 s42_no_align; 493 // passing structs in registers 494 __attribute__ ((noinline)) 495 int f42(int i, s42_no_align s1, s42_no_align s2) { 496 // CHECK: define i32 @f42(i32 %i, %struct.s42* %s1, %struct.s42* %s2) 497 // CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 0 498 // CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 0 499 // CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 1 500 // CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 1 501 return s1.i + s2.i + i + s1.s + s2.s; 502 } 503 s42_no_align g42; 504 s42_no_align g42_2; 505 int caller42() { 506 // CHECK: define i32 @caller42() 507 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4 508 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4 509 // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8* 510 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 511 // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8* 512 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 513 // CHECK: call i32 @f42(i32 3, %struct.s42* %[[a]], %struct.s42* %[[b]]) 514 return f42(3, g42, g42_2); 515 } 516 // passing structs on stack 517 __attribute__ ((noinline)) 518 int f42_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, 519 int i9, s42_no_align s1, s42_no_align s2) { 520 // CHECK: define i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* %s1, %struct.s42* %s2) 521 // CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 0 522 // CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 0 523 // CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 1 524 // CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 1 525 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; 526 } 527 int caller42_stack() { 528 // CHECK: define i32 @caller42_stack() 529 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4 530 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4 531 // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8* 532 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 533 // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8* 534 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 535 // CHECK: call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s42* %[[a]], %struct.s42* %[[b]]) 536 return f42_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g42, g42_2); 537 } 538 539 // structs with size > 16 bytes, with alignment attribute 540 struct s43 541 { 542 int i; 543 short s; 544 int i2; 545 short s2; 546 int i3; 547 short s3; 548 } __attribute__((aligned(16))); 549 typedef struct s43 s43_with_align; 550 // passing aligned structs in registers 551 __attribute__ ((noinline)) 552 int f43(int i, s43_with_align s1, s43_with_align s2) { 553 // CHECK: define i32 @f43(i32 %i, %struct.s43* %s1, %struct.s43* %s2) 554 // CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 0 555 // CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 0 556 // CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 1 557 // CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 1 558 return s1.i + s2.i + i + s1.s + s2.s; 559 } 560 s43_with_align g43; 561 s43_with_align g43_2; 562 int caller43() { 563 // CHECK: define i32 @caller43() 564 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16 565 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16 566 // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8* 567 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 568 // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8* 569 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 570 // CHECK: call i32 @f43(i32 3, %struct.s43* %[[a]], %struct.s43* %[[b]]) 571 return f43(3, g43, g43_2); 572 } 573 // passing aligned structs on stack 574 __attribute__ ((noinline)) 575 int f43_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, 576 int i9, s43_with_align s1, s43_with_align s2) { 577 // CHECK: define i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s43* %s1, %struct.s43* %s2) 578 // CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 0 579 // CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 0 580 // CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 1 581 // CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 1 582 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; 583 } 584 int caller43_stack() { 585 // CHECK: define i32 @caller43_stack() 586 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16 587 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16 588 // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8* 589 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 590 // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8* 591 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 592 // CHECK: call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s43* %[[a]], %struct.s43* %[[b]]) 593 return f43_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g43, g43_2); 594 } 595 596 // rdar://13668927 597 // We should not split argument s1 between registers and stack. 598 __attribute__ ((noinline)) 599 int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7, 600 s40_no_align s1, s40_no_align s2) { 601 // CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) 602 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s; 603 } 604 int caller40_split() { 605 // CHECK: define i32 @caller40_split() 606 // CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, [2 x i64] %{{.*}} [2 x i64] %{{.*}}) 607 return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2); 608 } 609 610 __attribute__ ((noinline)) 611 int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7, 612 s41_with_align s1, s41_with_align s2) { 613 // CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], i128 %s1.coerce, i128 %s2.coerce) 614 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s; 615 } 616 int caller41_split() { 617 // CHECK: define i32 @caller41_split() 618 // CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, i128 %{{.*}}, i128 %{{.*}}) 619 return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2); 620 } 621 622 // Handle homogeneous aggregates properly in variadic functions. 623 struct HFA { 624 float a, b, c, d; 625 }; 626 627 float test_hfa(int n, ...) { 628 // CHECK-LABEL: define float @test_hfa(i32 %n, ...) 629 // CHECK: [[THELIST:%.*]] = alloca i8* 630 // CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]] 631 632 // HFA is not indirect, so occupies its full 16 bytes on the stack. 633 // CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 16 634 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] 635 636 // CHECK: bitcast i8* [[CURLIST]] to %struct.HFA* 637 __builtin_va_list thelist; 638 __builtin_va_start(thelist, n); 639 struct HFA h = __builtin_va_arg(thelist, struct HFA); 640 return h.d; 641 } 642 643 float test_hfa_call(struct HFA *a) { 644 // CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a) 645 // CHECK: call float (i32, ...)* @test_hfa(i32 1, [2 x double] {{.*}}) 646 test_hfa(1, *a); 647 } 648 649 struct TooBigHFA { 650 float a, b, c, d, e; 651 }; 652 653 float test_toobig_hfa(int n, ...) { 654 // CHECK-LABEL: define float @test_toobig_hfa(i32 %n, ...) 655 // CHECK: [[THELIST:%.*]] = alloca i8* 656 // CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]] 657 658 // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes 659 // of stack consumed. 660 // CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 8 661 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] 662 663 // CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8** 664 // CHECK: [[HFAPTR:%.*]] = load i8** [[HFAPTRPTR]] 665 // CHECK: bitcast i8* [[HFAPTR]] to %struct.TooBigHFA* 666 __builtin_va_list thelist; 667 __builtin_va_start(thelist, n); 668 struct TooBigHFA h = __builtin_va_arg(thelist, struct TooBigHFA); 669 return h.d; 670 } 671 672 struct HVA { 673 int32x4_t a, b; 674 }; 675 676 int32x4_t test_hva(int n, ...) { 677 // CHECK-LABEL: define <4 x i32> @test_hva(i32 %n, ...) 678 // CHECK: [[THELIST:%.*]] = alloca i8* 679 // CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]] 680 681 // HVA is not indirect, so occupies its full 16 bytes on the stack. but it 682 // must be properly aligned. 683 // CHECK: [[ALIGN0:%.*]] = getelementptr i8* [[CURLIST]], i32 15 684 // CHECK: [[ALIGN1:%.*]] = ptrtoint i8* [[ALIGN0]] to i64 685 // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16 686 // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8* 687 688 // CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[ALIGNED_LIST]], i32 32 689 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] 690 691 // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA* 692 __builtin_va_list thelist; 693 __builtin_va_start(thelist, n); 694 struct HVA h = __builtin_va_arg(thelist, struct HVA); 695 return h.b; 696 } 697 698 struct TooBigHVA { 699 int32x4_t a, b, c, d, e; 700 }; 701 702 int32x4_t test_toobig_hva(int n, ...) { 703 // CHECK-LABEL: define <4 x i32> @test_toobig_hva(i32 %n, ...) 704 // CHECK: [[THELIST:%.*]] = alloca i8* 705 // CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]] 706 707 // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes 708 // of stack consumed. 709 // CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 8 710 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] 711 712 // CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8** 713 // CHECK: [[HVAPTR:%.*]] = load i8** [[HVAPTRPTR]] 714 // CHECK: bitcast i8* [[HVAPTR]] to %struct.TooBigHVA* 715 __builtin_va_list thelist; 716 __builtin_va_start(thelist, n); 717 struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA); 718 return h.d; 719 } 720