1 // RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s 2 3 // CHECK: define signext i8 @f0() 4 char f0(void) { 5 return 0; 6 } 7 8 // Struct as return type. Aggregates <= 16 bytes are passed directly and round 9 // up to multiple of 8 bytes. 10 // CHECK: define i64 @f1() 11 struct s1 { char f0; }; 12 struct s1 f1(void) {} 13 14 // CHECK: define i64 @f2() 15 struct s2 { short f0; }; 16 struct s2 f2(void) {} 17 18 // CHECK: define i64 @f3() 19 struct s3 { int f0; }; 20 struct s3 f3(void) {} 21 22 // CHECK: define i64 @f4() 23 struct s4 { struct s4_0 { int f0; } f0; }; 24 struct s4 f4(void) {} 25 26 // CHECK: define i64 @f5() 27 struct s5 { struct { } f0; int f1; }; 28 struct s5 f5(void) {} 29 30 // CHECK: define i64 @f6() 31 struct s6 { int f0[1]; }; 32 struct s6 f6(void) {} 33 34 // CHECK: define void @f7() 35 struct s7 { struct { int : 0; } f0; }; 36 struct s7 f7(void) {} 37 38 // CHECK: define void @f8() 39 struct s8 { struct { int : 0; } f0[1]; }; 40 struct s8 f8(void) {} 41 42 // CHECK: define i64 @f9() 43 struct s9 { int f0; int : 0; }; 44 struct s9 f9(void) {} 45 46 // CHECK: define i64 @f10() 47 struct s10 { int f0; int : 0; int : 0; }; 48 struct s10 f10(void) {} 49 50 // CHECK: define i64 @f11() 51 struct s11 { int : 0; int f0; }; 52 struct s11 f11(void) {} 53 54 // CHECK: define i64 @f12() 55 union u12 { char f0; short f1; int f2; }; 56 union u12 f12(void) {} 57 58 // Homogeneous Aggregate as return type will be passed directly. 59 // CHECK: define %struct.s13 @f13() 60 struct s13 { float f0; }; 61 struct s13 f13(void) {} 62 // CHECK: define %union.u14 @f14() 63 union u14 { float f0; }; 64 union u14 f14(void) {} 65 66 // CHECK: define void @f15() 67 void f15(struct s7 a0) {} 68 69 // CHECK: define void @f16() 70 void f16(struct s8 a0) {} 71 72 // CHECK: define i64 @f17() 73 struct s17 { short f0 : 13; char f1 : 4; }; 74 struct s17 f17(void) {} 75 76 // CHECK: define i64 @f18() 77 struct s18 { short f0; char f1 : 4; }; 78 struct s18 f18(void) {} 79 80 // CHECK: define i64 @f19() 81 struct s19 { int f0; struct s8 f1; }; 82 struct s19 f19(void) {} 83 84 // CHECK: define i64 @f20() 85 struct s20 { struct s8 f1; int f0; }; 86 struct s20 f20(void) {} 87 88 // CHECK: define i64 @f21() 89 struct s21 { struct {} f1; int f0 : 4; }; 90 struct s21 f21(void) {} 91 92 // CHECK: define i64 @f22() 93 // CHECK: define i64 @f23() 94 // CHECK: define i64 @f24() 95 // CHECK: define [2 x i64] @f25() 96 // CHECK: define { float, float } @f26() 97 // CHECK: define { double, double } @f27() 98 _Complex char f22(void) {} 99 _Complex short f23(void) {} 100 _Complex int f24(void) {} 101 _Complex long long f25(void) {} 102 _Complex float f26(void) {} 103 _Complex double f27(void) {} 104 105 // CHECK: define i64 @f28() 106 struct s28 { _Complex char f0; }; 107 struct s28 f28() {} 108 109 // CHECK: define i64 @f29() 110 struct s29 { _Complex short f0; }; 111 struct s29 f29() {} 112 113 // CHECK: define i64 @f30() 114 struct s30 { _Complex int f0; }; 115 struct s30 f30() {} 116 117 struct s31 { char x; }; 118 void f31(struct s31 s) { } 119 // CHECK: define void @f31(i64 %s.coerce) 120 // CHECK: %s = alloca %struct.s31, align 1 121 // CHECK: trunc i64 %s.coerce to i8 122 // CHECK: store i8 %{{.*}}, 123 124 struct s32 { double x; }; 125 void f32(struct s32 s) { } 126 // CHECK: @f32([1 x double] %{{.*}}) 127 128 // A composite type larger than 16 bytes should be passed indirectly. 129 struct s33 { char buf[32*32]; }; 130 void f33(struct s33 s) { } 131 // CHECK: define void @f33(%struct.s33* %s) 132 133 struct s34 { char c; }; 134 void f34(struct s34 s); 135 void g34(struct s34 *s) { f34(*s); } 136 // CHECK: @g34(%struct.s34* %s) 137 // CHECK: %[[a:.*]] = load i8, i8* %{{.*}} 138 // CHECK: zext i8 %[[a]] to i64 139 // CHECK: call void @f34(i64 %{{.*}}) 140 141 /* 142 * Check that va_arg accesses stack according to ABI alignment 143 */ 144 long long t1(int i, ...) { 145 // CHECK: t1 146 __builtin_va_list ap; 147 __builtin_va_start(ap, i); 148 // CHECK-NOT: add i32 %{{.*}} 7 149 // CHECK-NOT: and i32 %{{.*}} -8 150 long long ll = __builtin_va_arg(ap, long long); 151 __builtin_va_end(ap); 152 return ll; 153 } 154 double t2(int i, ...) { 155 // CHECK: t2 156 __builtin_va_list ap; 157 __builtin_va_start(ap, i); 158 // CHECK-NOT: add i32 %{{.*}} 7 159 // CHECK-NOT: and i32 %{{.*}} -8 160 double ll = __builtin_va_arg(ap, double); 161 __builtin_va_end(ap); 162 return ll; 163 } 164 165 #include <arm_neon.h> 166 167 // Homogeneous Vector Aggregate as return type and argument type. 168 // CHECK: define %struct.int8x16x2_t @f0_0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 169 int8x16x2_t f0_0(int8x16_t a0, int8x16_t a1) { 170 return vzipq_s8(a0, a1); 171 } 172 173 // Test direct vector passing. 174 typedef float T_float32x2 __attribute__ ((__vector_size__ (8))); 175 typedef float T_float32x4 __attribute__ ((__vector_size__ (16))); 176 typedef float T_float32x8 __attribute__ ((__vector_size__ (32))); 177 typedef float T_float32x16 __attribute__ ((__vector_size__ (64))); 178 179 // CHECK: define <2 x float> @f1_0(<2 x float> %{{.*}}) 180 T_float32x2 f1_0(T_float32x2 a0) { return a0; } 181 // CHECK: define <4 x float> @f1_1(<4 x float> %{{.*}}) 182 T_float32x4 f1_1(T_float32x4 a0) { return a0; } 183 // Vector with length bigger than 16-byte is illegal and is passed indirectly. 184 // CHECK: define void @f1_2(<8 x float>* noalias sret %{{.*}}, <8 x float>*) 185 T_float32x8 f1_2(T_float32x8 a0) { return a0; } 186 // CHECK: define void @f1_3(<16 x float>* noalias sret %{{.*}}, <16 x float>*) 187 T_float32x16 f1_3(T_float32x16 a0) { return a0; } 188 189 // Testing alignment with aggregates: HFA, aggregates with size <= 16 bytes and 190 // aggregates with size > 16 bytes. 191 struct s35 192 { 193 float v[4]; //Testing HFA. 194 } __attribute__((aligned(16))); 195 typedef struct s35 s35_with_align; 196 197 typedef __attribute__((neon_vector_type(4))) float float32x4_t; 198 float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) { 199 // CHECK: define <4 x float> @f35(i32 %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce) 200 // CHECK: %s1 = alloca %struct.s35, align 16 201 // CHECK: %s2 = alloca %struct.s35, align 16 202 // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>* 203 // CHECK: load <4 x float>, <4 x float>* %[[a]], align 16 204 // CHECK: %[[b:.*]] = bitcast %struct.s35* %s2 to <4 x float>* 205 // CHECK: load <4 x float>, <4 x float>* %[[b]], align 16 206 float32x4_t v = vaddq_f32(*(float32x4_t *)&s1, 207 *(float32x4_t *)&s2); 208 return v; 209 } 210 211 struct s36 212 { 213 int v[4]; //Testing 16-byte aggregate. 214 } __attribute__((aligned(16))); 215 typedef struct s36 s36_with_align; 216 217 typedef __attribute__((neon_vector_type(4))) int int32x4_t; 218 int32x4_t f36(int i, s36_with_align s1, s36_with_align s2) { 219 // CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce) 220 // CHECK: %s1 = alloca %struct.s36, align 16 221 // CHECK: %s2 = alloca %struct.s36, align 16 222 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 223 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 224 // CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>* 225 // CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16 226 // CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>* 227 // CHECK: load <4 x i32>, <4 x i32>* %[[b]], align 16 228 int32x4_t v = vaddq_s32(*(int32x4_t *)&s1, 229 *(int32x4_t *)&s2); 230 return v; 231 } 232 233 struct s37 234 { 235 int v[18]; //Testing large aggregate. 236 } __attribute__((aligned(16))); 237 typedef struct s37 s37_with_align; 238 239 int32x4_t f37(int i, s37_with_align s1, s37_with_align s2) { 240 // CHECK: define <4 x i32> @f37(i32 %i, %struct.s37* %s1, %struct.s37* %s2) 241 // CHECK: %[[a:.*]] = bitcast %struct.s37* %s1 to <4 x i32>* 242 // CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16 243 // CHECK: %[[b:.*]] = bitcast %struct.s37* %s2 to <4 x i32>* 244 // CHECK: load <4 x i32>, <4 x i32>* %[[b]], align 16 245 int32x4_t v = vaddq_s32(*(int32x4_t *)&s1, 246 *(int32x4_t *)&s2); 247 return v; 248 } 249 s37_with_align g37; 250 int32x4_t caller37() { 251 // CHECK: caller37 252 // CHECK: %[[a:.*]] = alloca %struct.s37, align 16 253 // CHECK: %[[b:.*]] = alloca %struct.s37, align 16 254 // CHECK: call void @llvm.memcpy 255 // CHECK: call void @llvm.memcpy 256 // CHECK: call <4 x i32> @f37(i32 3, %struct.s37* %[[a]], %struct.s37* %[[b]]) 257 return f37(3, g37, g37); 258 } 259 260 // rdar://problem/12648441 261 // Test passing structs with size < 8, < 16 and > 16 262 // with alignment of 16 and without 263 264 // structs with size <= 8 bytes, without alignment attribute 265 // passed as i64 regardless of the align attribute 266 struct s38 267 { 268 int i; 269 short s; 270 }; 271 typedef struct s38 s38_no_align; 272 // passing structs in registers 273 __attribute__ ((noinline)) 274 int f38(int i, s38_no_align s1, s38_no_align s2) { 275 // CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) 276 // CHECK: %s1 = alloca %struct.s38, align 4 277 // CHECK: %s2 = alloca %struct.s38, align 4 278 // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 4 279 // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 4 280 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0 281 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0 282 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1 283 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 1 284 return s1.i + s2.i + i + s1.s + s2.s; 285 } 286 s38_no_align g38; 287 s38_no_align g38_2; 288 int caller38() { 289 // CHECK: define i32 @caller38() 290 // CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4 291 // CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 292 // CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]]) 293 return f38(3, g38, g38_2); 294 } 295 // passing structs on stack 296 __attribute__ ((noinline)) 297 int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, 298 int i9, s38_no_align s1, s38_no_align s2) { 299 // CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) 300 // CHECK: %s1 = alloca %struct.s38, align 4 301 // CHECK: %s2 = alloca %struct.s38, align 4 302 // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 4 303 // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 4 304 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0 305 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0 306 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1 307 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 1 308 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; 309 } 310 int caller38_stack() { 311 // CHECK: define i32 @caller38_stack() 312 // CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4 313 // CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 314 // CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]]) 315 return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2); 316 } 317 318 // structs with size <= 8 bytes, with alignment attribute 319 struct s39 320 { 321 int i; 322 short s; 323 } __attribute__((aligned(16))); 324 typedef struct s39 s39_with_align; 325 // passing aligned structs in registers 326 __attribute__ ((noinline)) 327 int f39(int i, s39_with_align s1, s39_with_align s2) { 328 // CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) 329 // CHECK: %s1 = alloca %struct.s39, align 16 330 // CHECK: %s2 = alloca %struct.s39, align 16 331 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 332 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 333 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0 334 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0 335 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1 336 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 1 337 return s1.i + s2.i + i + s1.s + s2.s; 338 } 339 s39_with_align g39; 340 s39_with_align g39_2; 341 int caller39() { 342 // CHECK: define i32 @caller39() 343 // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16 344 // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 345 // CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]]) 346 return f39(3, g39, g39_2); 347 } 348 // passing aligned structs on stack 349 __attribute__ ((noinline)) 350 int f39_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, 351 int i9, s39_with_align s1, s39_with_align s2) { 352 // CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) 353 // CHECK: %s1 = alloca %struct.s39, align 16 354 // CHECK: %s2 = alloca %struct.s39, align 16 355 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 356 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 357 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0 358 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0 359 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1 360 // CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 1 361 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; 362 } 363 int caller39_stack() { 364 // CHECK: define i32 @caller39_stack() 365 // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16 366 // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 367 // CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]]) 368 return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2); 369 } 370 371 // structs with size <= 16 bytes, without alignment attribute 372 struct s40 373 { 374 int i; 375 short s; 376 int i2; 377 short s2; 378 }; 379 typedef struct s40 s40_no_align; 380 // passing structs in registers 381 __attribute__ ((noinline)) 382 int f40(int i, s40_no_align s1, s40_no_align s2) { 383 // CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) 384 // CHECK: %s1 = alloca %struct.s40, align 4 385 // CHECK: %s2 = alloca %struct.s40, align 4 386 // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 4 387 // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 4 388 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0 389 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0 390 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1 391 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 1 392 return s1.i + s2.i + i + s1.s + s2.s; 393 } 394 s40_no_align g40; 395 s40_no_align g40_2; 396 int caller40() { 397 // CHECK: define i32 @caller40() 398 // CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 399 // CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 400 // CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]]) 401 return f40(3, g40, g40_2); 402 } 403 // passing structs on stack 404 __attribute__ ((noinline)) 405 int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, 406 int i9, s40_no_align s1, s40_no_align s2) { 407 // CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) 408 // CHECK: %s1 = alloca %struct.s40, align 4 409 // CHECK: %s2 = alloca %struct.s40, align 4 410 // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 4 411 // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 4 412 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0 413 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0 414 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1 415 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 1 416 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; 417 } 418 int caller40_stack() { 419 // CHECK: define i32 @caller40_stack() 420 // CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 421 // CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 422 // CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]]) 423 return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2); 424 } 425 426 // structs with size <= 16 bytes, with alignment attribute 427 struct s41 428 { 429 int i; 430 short s; 431 int i2; 432 short s2; 433 } __attribute__((aligned(16))); 434 typedef struct s41 s41_with_align; 435 // passing aligned structs in registers 436 __attribute__ ((noinline)) 437 int f41(int i, s41_with_align s1, s41_with_align s2) { 438 // CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) 439 // CHECK: %s1 = alloca %struct.s41, align 16 440 // CHECK: %s2 = alloca %struct.s41, align 16 441 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 442 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 443 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0 444 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0 445 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1 446 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 1 447 return s1.i + s2.i + i + s1.s + s2.s; 448 } 449 s41_with_align g41; 450 s41_with_align g41_2; 451 int caller41() { 452 // CHECK: define i32 @caller41() 453 // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16 454 // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 455 // CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]]) 456 return f41(3, g41, g41_2); 457 } 458 // passing aligned structs on stack 459 __attribute__ ((noinline)) 460 int f41_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, 461 int i9, s41_with_align s1, s41_with_align s2) { 462 // CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) 463 // CHECK: %s1 = alloca %struct.s41, align 16 464 // CHECK: %s2 = alloca %struct.s41, align 16 465 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 16 466 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 16 467 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0 468 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0 469 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1 470 // CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 1 471 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; 472 } 473 int caller41_stack() { 474 // CHECK: define i32 @caller41_stack() 475 // CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16 476 // CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 477 // CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]]) 478 return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2); 479 } 480 481 // structs with size > 16 bytes, without alignment attribute 482 struct s42 483 { 484 int i; 485 short s; 486 int i2; 487 short s2; 488 int i3; 489 short s3; 490 }; 491 typedef struct s42 s42_no_align; 492 // passing structs in registers 493 __attribute__ ((noinline)) 494 int f42(int i, s42_no_align s1, s42_no_align s2) { 495 // CHECK: define i32 @f42(i32 %i, %struct.s42* %s1, %struct.s42* %s2) 496 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 0 497 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 0 498 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 1 499 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 1 500 return s1.i + s2.i + i + s1.s + s2.s; 501 } 502 s42_no_align g42; 503 s42_no_align g42_2; 504 int caller42() { 505 // CHECK: define i32 @caller42() 506 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4 507 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4 508 // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8* 509 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 510 // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8* 511 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 512 // CHECK: call i32 @f42(i32 3, %struct.s42* %[[a]], %struct.s42* %[[b]]) 513 return f42(3, g42, g42_2); 514 } 515 // passing structs on stack 516 __attribute__ ((noinline)) 517 int f42_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, 518 int i9, s42_no_align s1, s42_no_align s2) { 519 // CHECK: define i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* %s1, %struct.s42* %s2) 520 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 0 521 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 0 522 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 1 523 // CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 1 524 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; 525 } 526 int caller42_stack() { 527 // CHECK: define i32 @caller42_stack() 528 // CHECK: %[[a:.*]] = alloca %struct.s42, align 4 529 // CHECK: %[[b:.*]] = alloca %struct.s42, align 4 530 // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8* 531 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 532 // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8* 533 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 534 // CHECK: call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s42* %[[a]], %struct.s42* %[[b]]) 535 return f42_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g42, g42_2); 536 } 537 538 // structs with size > 16 bytes, with alignment attribute 539 struct s43 540 { 541 int i; 542 short s; 543 int i2; 544 short s2; 545 int i3; 546 short s3; 547 } __attribute__((aligned(16))); 548 typedef struct s43 s43_with_align; 549 // passing aligned structs in registers 550 __attribute__ ((noinline)) 551 int f43(int i, s43_with_align s1, s43_with_align s2) { 552 // CHECK: define i32 @f43(i32 %i, %struct.s43* %s1, %struct.s43* %s2) 553 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 0 554 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 0 555 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 1 556 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 1 557 return s1.i + s2.i + i + s1.s + s2.s; 558 } 559 s43_with_align g43; 560 s43_with_align g43_2; 561 int caller43() { 562 // CHECK: define i32 @caller43() 563 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16 564 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16 565 // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8* 566 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 567 // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8* 568 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 569 // CHECK: call i32 @f43(i32 3, %struct.s43* %[[a]], %struct.s43* %[[b]]) 570 return f43(3, g43, g43_2); 571 } 572 // passing aligned structs on stack 573 __attribute__ ((noinline)) 574 int f43_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, 575 int i9, s43_with_align s1, s43_with_align s2) { 576 // CHECK: define i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s43* %s1, %struct.s43* %s2) 577 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 0 578 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 0 579 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 1 580 // CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 1 581 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; 582 } 583 int caller43_stack() { 584 // CHECK: define i32 @caller43_stack() 585 // CHECK: %[[a:.*]] = alloca %struct.s43, align 16 586 // CHECK: %[[b:.*]] = alloca %struct.s43, align 16 587 // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8* 588 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 589 // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8* 590 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 591 // CHECK: call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s43* %[[a]], %struct.s43* %[[b]]) 592 return f43_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g43, g43_2); 593 } 594 595 // rdar://13668927 596 // We should not split argument s1 between registers and stack. 597 __attribute__ ((noinline)) 598 int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7, 599 s40_no_align s1, s40_no_align s2) { 600 // CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) 601 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s; 602 } 603 int caller40_split() { 604 // CHECK: define i32 @caller40_split() 605 // CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [2 x i64] %{{.*}} [2 x i64] %{{.*}}) 606 return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2); 607 } 608 609 __attribute__ ((noinline)) 610 int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7, 611 s41_with_align s1, s41_with_align s2) { 612 // CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i128 %s1.coerce, i128 %s2.coerce) 613 return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s; 614 } 615 int caller41_split() { 616 // CHECK: define i32 @caller41_split() 617 // CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 %{{.*}}, i128 %{{.*}}) 618 return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2); 619 } 620 621 // Handle homogeneous aggregates properly in variadic functions. 622 struct HFA { 623 float a, b, c, d; 624 }; 625 626 float test_hfa(int n, ...) { 627 // CHECK-LABEL: define float @test_hfa(i32 %n, ...) 628 // CHECK: [[THELIST:%.*]] = alloca i8* 629 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]] 630 631 // HFA is not indirect, so occupies its full 16 bytes on the stack. 632 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 16 633 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] 634 635 // CHECK: bitcast i8* [[CURLIST]] to %struct.HFA* 636 __builtin_va_list thelist; 637 __builtin_va_start(thelist, n); 638 struct HFA h = __builtin_va_arg(thelist, struct HFA); 639 return h.d; 640 } 641 642 float test_hfa_call(struct HFA *a) { 643 // CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a) 644 // CHECK: call float (i32, ...) @test_hfa(i32 1, [4 x float] {{.*}}) 645 test_hfa(1, *a); 646 } 647 648 struct TooBigHFA { 649 float a, b, c, d, e; 650 }; 651 652 float test_toobig_hfa(int n, ...) { 653 // CHECK-LABEL: define float @test_toobig_hfa(i32 %n, ...) 654 // CHECK: [[THELIST:%.*]] = alloca i8* 655 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]] 656 657 // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes 658 // of stack consumed. 659 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8 660 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] 661 662 // CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHFA** 663 // CHECK: [[HFAPTR:%.*]] = load %struct.TooBigHFA*, %struct.TooBigHFA** [[HFAPTRPTR]] 664 __builtin_va_list thelist; 665 __builtin_va_start(thelist, n); 666 struct TooBigHFA h = __builtin_va_arg(thelist, struct TooBigHFA); 667 return h.d; 668 } 669 670 struct HVA { 671 int32x4_t a, b; 672 }; 673 674 int32x4_t test_hva(int n, ...) { 675 // CHECK-LABEL: define <4 x i32> @test_hva(i32 %n, ...) 676 // CHECK: [[THELIST:%.*]] = alloca i8* 677 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]] 678 679 // HVA is not indirect, so occupies its full 16 bytes on the stack. but it 680 // must be properly aligned. 681 // CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64 682 // CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15 683 // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16 684 // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8* 685 686 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 32 687 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] 688 689 // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA* 690 __builtin_va_list thelist; 691 __builtin_va_start(thelist, n); 692 struct HVA h = __builtin_va_arg(thelist, struct HVA); 693 return h.b; 694 } 695 696 struct TooBigHVA { 697 int32x4_t a, b, c, d, e; 698 }; 699 700 int32x4_t test_toobig_hva(int n, ...) { 701 // CHECK-LABEL: define <4 x i32> @test_toobig_hva(i32 %n, ...) 702 // CHECK: [[THELIST:%.*]] = alloca i8* 703 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]] 704 705 // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes 706 // of stack consumed. 707 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8 708 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] 709 710 // CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHVA** 711 // CHECK: [[HVAPTR:%.*]] = load %struct.TooBigHVA*, %struct.TooBigHVA** [[HVAPTRPTR]] 712 __builtin_va_list thelist; 713 __builtin_va_start(thelist, n); 714 struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA); 715 return h.d; 716 } 717 718 typedef __attribute__((__ext_vector_type__(3))) float float32x3_t; 719 typedef struct { float32x3_t arr[4]; } HFAv3; 720 721 float32x3_t test_hva_v3(int n, ...) { 722 // CHECK-LABEL: define <3 x float> @test_hva_v3(i32 %n, ...) 723 // CHECK: [[THELIST:%.*]] = alloca i8* 724 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]] 725 726 // HVA is not indirect, so occupies its full 16 bytes on the stack. but it 727 // must be properly aligned. 728 // CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64 729 // CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15 730 // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16 731 // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8* 732 733 // CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 64 734 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] 735 736 // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HFAv3* 737 __builtin_va_list l; 738 __builtin_va_start(l, n); 739 HFAv3 r = __builtin_va_arg(l, HFAv3); 740 return r.arr[2]; 741 } 742 743 float32x3_t test_hva_v3_call(HFAv3 *a) { 744 // CHECK-LABEL: define <3 x float> @test_hva_v3_call(%struct.HFAv3* %a) 745 // CHECK: call <3 x float> (i32, ...) @test_hva_v3(i32 1, [4 x <4 x float>] {{.*}}) 746 return test_hva_v3(1, *a); 747 } 748