1 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \ 2 // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512 3 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \ 4 // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512 5 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \ 6 // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512 7 #include <stdarg.h> 8 9 // CHECK-LABEL: define signext i8 @f0() 10 char f0(void) { 11 return 0; 12 } 13 14 // CHECK-LABEL: define signext i16 @f1() 15 short f1(void) { 16 return 0; 17 } 18 19 // CHECK-LABEL: define i32 @f2() 20 int f2(void) { 21 return 0; 22 } 23 24 // CHECK-LABEL: define float @f3() 25 float f3(void) { 26 return 0; 27 } 28 29 // CHECK-LABEL: define double @f4() 30 double f4(void) { 31 return 0; 32 } 33 34 // CHECK-LABEL: define x86_fp80 @f5() 35 long double f5(void) { 36 return 0; 37 } 38 39 // CHECK-LABEL: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4) 40 void f6(char a0, short a1, int a2, long long a3, void *a4) { 41 } 42 43 // CHECK-LABEL: define void @f7(i32 %a0) 44 typedef enum { A, B, C } e7; 45 void f7(e7 a0) { 46 } 47 48 // Test merging/passing of upper eightbyte with X87 class. 49 // 50 // CHECK-LABEL: define void @f8_1(%union.u8* noalias sret %agg.result) 51 // CHECK-LABEL: define void @f8_2(%union.u8* byval align 16 %a0) 52 union u8 { 53 long double a; 54 int b; 55 }; 56 union u8 f8_1() { while (1) {} } 57 void f8_2(union u8 a0) {} 58 59 // CHECK-LABEL: define i64 @f9() 60 struct s9 { int a; int b; int : 0; } f9(void) { while (1) {} } 61 62 // CHECK-LABEL: define void @f10(i64 %a0.coerce) 63 struct s10 { int a; int b; int : 0; }; 64 void f10(struct s10 a0) {} 65 66 // CHECK-LABEL: define void @f11(%union.anon* noalias sret %agg.result) 67 union { long double a; float b; } f11() { while (1) {} } 68 69 // CHECK-LABEL: define i32 @f12_0() 70 // CHECK-LABEL: define void @f12_1(i32 %a0.coerce) 71 struct s12 { int a __attribute__((aligned(16))); }; 72 struct s12 f12_0(void) { while (1) {} } 73 void f12_1(struct s12 a0) {} 74 75 // Check that sret parameter is accounted for when checking available integer 76 // registers. 77 // CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval align 8 %e, i32 %f) 78 79 struct s13_0 { long long f0[3]; }; 80 struct s13_1 { long long f0[2]; }; 81 struct s13_0 f13(int a, int b, int c, int d, 82 struct s13_1 e, int f) { while (1) {} } 83 84 // CHECK: define void @f14({{.*}}, i8 signext %X) 85 void f14(int a, int b, int c, int d, int e, int f, char X) {} 86 87 // CHECK: define void @f15({{.*}}, i8* %X) 88 void f15(int a, int b, int c, int d, int e, int f, void *X) {} 89 90 // CHECK: define void @f16({{.*}}, float %X) 91 void f16(float a, float b, float c, float d, float e, float f, float g, float h, 92 float X) {} 93 94 // CHECK: define void @f17({{.*}}, x86_fp80 %X) 95 void f17(float a, float b, float c, float d, float e, float f, float g, float h, 96 long double X) {} 97 98 // Check for valid coercion. The struct should be passed/returned as i32, not 99 // as i64 for better code quality. 100 // rdar://8135035 101 // CHECK-LABEL: define void @f18(i32 %a, i32 %f18_arg1.coerce) 102 struct f18_s0 { int f0; }; 103 void f18(int a, struct f18_s0 f18_arg1) { while (1) {} } 104 105 // Check byval alignment. 106 107 // CHECK-LABEL: define void @f19(%struct.s19* byval align 16 %x) 108 struct s19 { 109 long double a; 110 }; 111 void f19(struct s19 x) {} 112 113 // CHECK-LABEL: define void @f20(%struct.s20* byval align 32 %x) 114 struct __attribute__((aligned(32))) s20 { 115 int x; 116 int y; 117 }; 118 void f20(struct s20 x) {} 119 120 struct StringRef { 121 long x; 122 const char *Ptr; 123 }; 124 125 // rdar://7375902 126 // CHECK-LABEL: define i8* @f21(i64 %S.coerce0, i8* %S.coerce1) 127 const char *f21(struct StringRef S) { return S.x+S.Ptr; } 128 129 // PR7567 130 typedef __attribute__ ((aligned(16))) struct f22s { unsigned long long x[2]; } L; 131 void f22(L x, L y) { } 132 // CHECK: @f22 133 // CHECK: %x = alloca{{.*}}, align 16 134 // CHECK: %y = alloca{{.*}}, align 16 135 136 137 138 // PR7714 139 struct f23S { 140 short f0; 141 unsigned f1; 142 int f2; 143 }; 144 145 146 void f23(int A, struct f23S B) { 147 // CHECK-LABEL: define void @f23(i32 %A, i64 %B.coerce0, i32 %B.coerce1) 148 } 149 150 struct f24s { long a; int b; }; 151 152 struct f23S f24(struct f23S *X, struct f24s *P2) { 153 return *X; 154 155 // CHECK: define { i64, i32 } @f24(%struct.f23S* %X, %struct.f24s* %P2) 156 } 157 158 // rdar://8248065 159 typedef float v4f32 __attribute__((__vector_size__(16))); 160 v4f32 f25(v4f32 X) { 161 // CHECK-LABEL: define <4 x float> @f25(<4 x float> %X) 162 // CHECK-NOT: alloca 163 // CHECK: alloca <4 x float> 164 // CHECK-NOT: alloca 165 // CHECK: store <4 x float> %X, <4 x float>* 166 // CHECK-NOT: store 167 // CHECK: ret <4 x float> 168 return X+X; 169 } 170 171 struct foo26 { 172 int *X; 173 float *Y; 174 }; 175 176 struct foo26 f26(struct foo26 *P) { 177 // CHECK: define { i32*, float* } @f26(%struct.foo26* %P) 178 return *P; 179 } 180 181 182 struct v4f32wrapper { 183 v4f32 v; 184 }; 185 186 struct v4f32wrapper f27(struct v4f32wrapper X) { 187 // CHECK-LABEL: define <4 x float> @f27(<4 x float> %X.coerce) 188 return X; 189 } 190 191 // PR22563 - We should unwrap simple structs and arrays to pass 192 // and return them in the appropriate vector registers if possible. 193 194 typedef float v8f32 __attribute__((__vector_size__(32))); 195 struct v8f32wrapper { 196 v8f32 v; 197 }; 198 199 struct v8f32wrapper f27a(struct v8f32wrapper X) { 200 // AVX-LABEL: define <8 x float> @f27a(<8 x float> %X.coerce) 201 return X; 202 } 203 204 struct v8f32wrapper_wrapper { 205 v8f32 v[1]; 206 }; 207 208 struct v8f32wrapper_wrapper f27b(struct v8f32wrapper_wrapper X) { 209 // AVX-LABEL: define <8 x float> @f27b(<8 x float> %X.coerce) 210 return X; 211 } 212 213 // rdar://5711709 214 struct f28c { 215 double x; 216 int y; 217 }; 218 void f28(struct f28c C) { 219 // CHECK-LABEL: define void @f28(double %C.coerce0, i32 %C.coerce1) 220 } 221 222 struct f29a { 223 struct c { 224 double x; 225 int y; 226 } x[1]; 227 }; 228 229 void f29a(struct f29a A) { 230 // CHECK-LABEL: define void @f29a(double %A.coerce0, i32 %A.coerce1) 231 } 232 233 // rdar://8249586 234 struct S0 { char f0[8]; char f2; char f3; char f4; }; 235 void f30(struct S0 p_4) { 236 // CHECK-LABEL: define void @f30(i64 %p_4.coerce0, i24 %p_4.coerce1) 237 } 238 239 // Pass the third element as a float when followed by tail padding. 240 // rdar://8251384 241 struct f31foo { float a, b, c; }; 242 float f31(struct f31foo X) { 243 // CHECK-LABEL: define float @f31(<2 x float> %X.coerce0, float %X.coerce1) 244 return X.c; 245 } 246 247 _Complex float f32(_Complex float A, _Complex float B) { 248 // rdar://6379669 249 // CHECK-LABEL: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce) 250 return A+B; 251 } 252 253 254 // rdar://8357396 255 struct f33s { long x; float c,d; }; 256 257 void f33(va_list X) { 258 va_arg(X, struct f33s); 259 } 260 261 typedef unsigned long long v1i64 __attribute__((__vector_size__(8))); 262 263 // rdar://8359248 264 // CHECK-LABEL: define i64 @f34(i64 %arg.coerce) 265 v1i64 f34(v1i64 arg) { return arg; } 266 267 268 // rdar://8358475 269 // CHECK-LABEL: define i64 @f35(i64 %arg.coerce) 270 typedef unsigned long v1i64_2 __attribute__((__vector_size__(8))); 271 v1i64_2 f35(v1i64_2 arg) { return arg+arg; } 272 273 // rdar://9122143 274 // CHECK: declare void @func(%struct._str* byval align 16) 275 typedef struct _str { 276 union { 277 long double a; 278 long c; 279 }; 280 } str; 281 282 void func(str s); 283 str ss; 284 void f9122143() 285 { 286 func(ss); 287 } 288 289 // CHECK-LABEL: define double @f36(double %arg.coerce) 290 typedef unsigned v2i32 __attribute((__vector_size__(8))); 291 v2i32 f36(v2i32 arg) { return arg; } 292 293 // AVX: declare void @f38(<8 x float>) 294 // AVX: declare void @f37(<8 x float>) 295 // SSE: declare void @f38(%struct.s256* byval align 32) 296 // SSE: declare void @f37(<8 x float>* byval align 32) 297 typedef float __m256 __attribute__ ((__vector_size__ (32))); 298 typedef struct { 299 __m256 m; 300 } s256; 301 302 s256 x38; 303 __m256 x37; 304 305 void f38(s256 x); 306 void f37(__m256 x); 307 void f39() { f38(x38); f37(x37); } 308 309 // The two next tests make sure that the struct below is passed 310 // in the same way regardless of avx being used 311 312 // CHECK: declare void @func40(%struct.t128* byval align 16) 313 typedef float __m128 __attribute__ ((__vector_size__ (16))); 314 typedef struct t128 { 315 __m128 m; 316 __m128 n; 317 } two128; 318 319 extern void func40(two128 s); 320 void func41(two128 s) { 321 func40(s); 322 } 323 324 // CHECK: declare void @func42(%struct.t128_2* byval align 16) 325 typedef struct xxx { 326 __m128 array[2]; 327 } Atwo128; 328 typedef struct t128_2 { 329 Atwo128 x; 330 } SA; 331 332 extern void func42(SA s); 333 void func43(SA s) { 334 func42(s); 335 } 336 337 // CHECK-LABEL: define i32 @f44 338 // CHECK: ptrtoint 339 // CHECK-NEXT: add i64 %{{[0-9]+}}, 31 340 // CHECK-NEXT: and i64 %{{[0-9]+}}, -32 341 // CHECK-NEXT: inttoptr 342 typedef int T44 __attribute((vector_size(32))); 343 struct s44 { T44 x; int y; }; 344 int f44(int i, ...) { 345 __builtin_va_list ap; 346 __builtin_va_start(ap, i); 347 struct s44 s = __builtin_va_arg(ap, struct s44); 348 __builtin_va_end(ap); 349 return s.y; 350 } 351 352 // Text that vec3 returns the correct LLVM IR type. 353 // AVX-LABEL: define i32 @foo(<3 x i64> %X) 354 typedef long long3 __attribute((ext_vector_type(3))); 355 int foo(long3 X) 356 { 357 return 0; 358 } 359 360 // Make sure we don't use a varargs convention for a function without a 361 // prototype where AVX types are involved. 362 // AVX: @test45 363 // AVX: call i32 bitcast (i32 (...)* @f45 to i32 (<8 x float>)*) 364 int f45(); 365 __m256 x45; 366 void test45() { f45(x45); } 367 368 // Make sure we use byval to pass 64-bit vectors in memory; the LLVM call 369 // lowering can't handle this case correctly because it runs after legalization. 370 // CHECK: @test46 371 // CHECK: call void @f46({{.*}}<2 x float>* byval align 8 {{.*}}, <2 x float>* byval align 8 {{.*}}) 372 typedef float v46 __attribute((vector_size(8))); 373 void f46(v46,v46,v46,v46,v46,v46,v46,v46,v46,v46); 374 void test46() { v46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); } 375 376 // Check that we pass the struct below without using byval, which helps out 377 // codegen. 378 // 379 // CHECK: @test47 380 // CHECK: call void @f47(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) 381 struct s47 { unsigned a; }; 382 void f47(int,int,int,int,int,int,struct s47); 383 void test47(int a, struct s47 b) { f47(a, a, a, a, a, a, b); } 384 385 // rdar://12723368 386 // In the following example, there are holes in T4 at the 3rd byte and the 4th 387 // byte, however, T2 does not have those holes. T4 is chosen to be the 388 // representing type for union T1, but we can't use load or store of T4 since 389 // it will skip the 3rd byte and the 4th byte. 390 // In general, Since we don't accurately represent the data fields of a union, 391 // do not use load or store of the representing llvm type for the union. 392 typedef _Complex int T2; 393 typedef _Complex char T5; 394 typedef _Complex int T7; 395 typedef struct T4 { T5 field0; T7 field1; } T4; 396 typedef union T1 { T2 field0; T4 field1; } T1; 397 extern T1 T1_retval; 398 T1 test48(void) { 399 // CHECK: @test48 400 // CHECK: memcpy 401 // CHECK: memcpy 402 return T1_retval; 403 } 404 405 void test49_helper(double, ...); 406 void test49(double d, double e) { 407 test49_helper(d, e); 408 } 409 // CHECK-LABEL: define void @test49( 410 // CHECK: [[T0:%.*]] = load double, double* 411 // CHECK-NEXT: [[T1:%.*]] = load double, double* 412 // CHECK-NEXT: call void (double, ...) @test49_helper(double [[T0]], double [[T1]]) 413 414 void test50_helper(); 415 void test50(double d, double e) { 416 test50_helper(d, e); 417 } 418 // CHECK-LABEL: define void @test50( 419 // CHECK: [[T0:%.*]] = load double, double* 420 // CHECK-NEXT: [[T1:%.*]] = load double, double* 421 // CHECK-NEXT: call void (double, double, ...) bitcast (void (...)* @test50_helper to void (double, double, ...)*)(double [[T0]], double [[T1]]) 422 423 struct test51_s { __uint128_t intval; }; 424 void test51(struct test51_s *s, __builtin_va_list argList) { 425 *s = __builtin_va_arg(argList, struct test51_s); 426 } 427 428 // CHECK-LABEL: define void @test51 429 // CHECK: [[TMP_ADDR:%.*]] = alloca [[STRUCT_TEST51:%.*]], align 16 430 // CHECK: br i1 431 // CHECK: [[REG_SAVE_AREA_PTR:%.*]] = getelementptr inbounds {{.*}}, i32 0, i32 3 432 // CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8*, i8** [[REG_SAVE_AREA_PTR]] 433 // CHECK-NEXT: [[VALUE_ADDR:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 {{.*}} 434 // CHECK-NEXT: [[CASTED_VALUE_ADDR:%.*]] = bitcast i8* [[VALUE_ADDR]] to [[STRUCT_TEST51]] 435 // CHECK-NEXT: [[CASTED_TMP_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[TMP_ADDR]] to i8* 436 // CHECK-NEXT: [[RECASTED_VALUE_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[CASTED_VALUE_ADDR]] to i8* 437 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CASTED_TMP_ADDR]], i8* [[RECASTED_VALUE_ADDR]], i64 16, i32 8, i1 false) 438 // CHECK-NEXT: add i32 {{.*}}, 16 439 // CHECK-NEXT: store i32 {{.*}}, i32* {{.*}} 440 // CHECK-NEXT: br label 441 442 void test52_helper(int, ...); 443 __m256 x52; 444 void test52() { 445 test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); 446 } 447 // AVX: @test52_helper(i32 0, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) 448 449 void test53(__m256 *m, __builtin_va_list argList) { 450 *m = __builtin_va_arg(argList, __m256); 451 } 452 // AVX-LABEL: define void @test53 453 // AVX-NOT: br i1 454 // AVX: ret void 455 456 void test54_helper(__m256, ...); 457 __m256 x54; 458 void test54() { 459 test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); 460 test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); 461 } 462 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) 463 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}}) 464 465 typedef float __m512 __attribute__ ((__vector_size__ (64))); 466 typedef struct { 467 __m512 m; 468 } s512; 469 470 s512 x55; 471 __m512 x56; 472 473 // Even on AVX512, aggregates of size larger than four eightbytes have class 474 // MEMORY (AVX512 draft 0.3 3.2.3p2 Rule 1). 475 // 476 // CHECK: declare void @f55(%struct.s512* byval align 64) 477 void f55(s512 x); 478 479 // However, __m512 has type SSE/SSEUP on AVX512. 480 // 481 // AVX512: declare void @f56(<16 x float>) 482 // NO-AVX512: declare void @f56(<16 x float>* byval align 64) 483 void f56(__m512 x); 484 void f57() { f55(x55); f56(x56); } 485 486 // Like for __m128 on AVX, check that the struct below is passed 487 // in the same way regardless of AVX512 being used. 488 // 489 // CHECK: declare void @f58(%struct.t256* byval align 32) 490 typedef struct t256 { 491 __m256 m; 492 __m256 n; 493 } two256; 494 495 extern void f58(two256 s); 496 void f59(two256 s) { 497 f58(s); 498 } 499 500 // CHECK: declare void @f60(%struct.sat256* byval align 32) 501 typedef struct at256 { 502 __m256 array[2]; 503 } Atwo256; 504 typedef struct sat256 { 505 Atwo256 x; 506 } SAtwo256; 507 508 extern void f60(SAtwo256 s); 509 void f61(SAtwo256 s) { 510 f60(s); 511 } 512 513 // AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) 514 void f62_helper(int, ...); 515 __m512 x62; 516 void f62() { 517 f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); 518 } 519 520 // Like for __m256 on AVX, we always pass __m512 in memory, and don't 521 // need to use the register save area. 522 // 523 // AVX512-LABEL: define void @f63 524 // AVX512-NOT: br i1 525 // AVX512: ret void 526 void f63(__m512 *m, __builtin_va_list argList) { 527 *m = __builtin_va_arg(argList, __m512); 528 } 529 530 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) 531 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}}) 532 void f64_helper(__m512, ...); 533 __m512 x64; 534 void f64() { 535 f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); 536 f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); 537 } 538