Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
      2 // RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512
      3 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \
      4 // RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512
      5 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \
      6 // RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512
      7 #include <stdarg.h>
      8 
      9 // CHECK-LABEL: define signext i8 @f0()
     10 char f0(void) {
     11   return 0;
     12 }
     13 
     14 // CHECK-LABEL: define signext i16 @f1()
     15 short f1(void) {
     16   return 0;
     17 }
     18 
     19 // CHECK-LABEL: define i32 @f2()
     20 int f2(void) {
     21   return 0;
     22 }
     23 
     24 // CHECK-LABEL: define float @f3()
     25 float f3(void) {
     26   return 0;
     27 }
     28 
     29 // CHECK-LABEL: define double @f4()
     30 double f4(void) {
     31   return 0;
     32 }
     33 
     34 // CHECK-LABEL: define x86_fp80 @f5()
     35 long double f5(void) {
     36   return 0;
     37 }
     38 
     39 // CHECK-LABEL: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4)
     40 void f6(char a0, short a1, int a2, long long a3, void *a4) {
     41 }
     42 
     43 // CHECK-LABEL: define void @f7(i32 %a0)
     44 typedef enum { A, B, C } e7;
     45 void f7(e7 a0) {
     46 }
     47 
     48 // Test merging/passing of upper eightbyte with X87 class.
     49 //
     50 // CHECK-LABEL: define void @f8_1(%union.u8* noalias sret %agg.result)
     51 // CHECK-LABEL: define void @f8_2(%union.u8* byval align 16 %a0)
     52 union u8 {
     53   long double a;
     54   int b;
     55 };
     56 union u8 f8_1() { while (1) {} }
     57 void f8_2(union u8 a0) {}
     58 
     59 // CHECK-LABEL: define i64 @f9()
     60 struct s9 { int a; int b; int : 0; } f9(void) { while (1) {} }
     61 
     62 // CHECK-LABEL: define void @f10(i64 %a0.coerce)
     63 struct s10 { int a; int b; int : 0; };
     64 void f10(struct s10 a0) {}
     65 
     66 // CHECK-LABEL: define void @f11(%union.anon* noalias sret %agg.result)
     67 union { long double a; float b; } f11() { while (1) {} }
     68 
     69 // CHECK-LABEL: define i32 @f12_0()
     70 // CHECK-LABEL: define void @f12_1(i32 %a0.coerce)
     71 struct s12 { int a __attribute__((aligned(16))); };
     72 struct s12 f12_0(void) { while (1) {} }
     73 void f12_1(struct s12 a0) {}
     74 
     75 // Check that sret parameter is accounted for when checking available integer
     76 // registers.
     77 // CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval align 8 %e, i32 %f)
     78 
     79 struct s13_0 { long long f0[3]; };
     80 struct s13_1 { long long f0[2]; };
     81 struct s13_0 f13(int a, int b, int c, int d,
     82                  struct s13_1 e, int f) { while (1) {} }
     83 
     84 // CHECK: define void @f14({{.*}}, i8 signext %X)
     85 void f14(int a, int b, int c, int d, int e, int f, char X) {}
     86 
     87 // CHECK: define void @f15({{.*}}, i8* %X)
     88 void f15(int a, int b, int c, int d, int e, int f, void *X) {}
     89 
     90 // CHECK: define void @f16({{.*}}, float %X)
     91 void f16(float a, float b, float c, float d, float e, float f, float g, float h,
     92          float X) {}
     93 
     94 // CHECK: define void @f17({{.*}}, x86_fp80 %X)
     95 void f17(float a, float b, float c, float d, float e, float f, float g, float h,
     96          long double X) {}
     97 
     98 // Check for valid coercion.  The struct should be passed/returned as i32, not
     99 // as i64 for better code quality.
    100 // rdar://8135035
    101 // CHECK-LABEL: define void @f18(i32 %a, i32 %f18_arg1.coerce)
    102 struct f18_s0 { int f0; };
    103 void f18(int a, struct f18_s0 f18_arg1) { while (1) {} }
    104 
    105 // Check byval alignment.
    106 
    107 // CHECK-LABEL: define void @f19(%struct.s19* byval align 16 %x)
    108 struct s19 {
    109   long double a;
    110 };
    111 void f19(struct s19 x) {}
    112 
    113 // CHECK-LABEL: define void @f20(%struct.s20* byval align 32 %x)
    114 struct __attribute__((aligned(32))) s20 {
    115   int x;
    116   int y;
    117 };
    118 void f20(struct s20 x) {}
    119 
    120 struct StringRef {
    121   long x;
    122   const char *Ptr;
    123 };
    124 
    125 // rdar://7375902
    126 // CHECK-LABEL: define i8* @f21(i64 %S.coerce0, i8* %S.coerce1)
    127 const char *f21(struct StringRef S) { return S.x+S.Ptr; }
    128 
    129 // PR7567
    130 typedef __attribute__ ((aligned(16))) struct f22s { unsigned long long x[2]; } L;
    131 void f22(L x, L y) { }
    132 // CHECK: @f22
    133 // CHECK: %x = alloca{{.*}}, align 16
    134 // CHECK: %y = alloca{{.*}}, align 16
    135 
    136 
    137 
    138 // PR7714
    139 struct f23S {
    140   short f0;
    141   unsigned f1;
    142   int f2;
    143 };
    144 
    145 
    146 void f23(int A, struct f23S B) {
    147   // CHECK-LABEL: define void @f23(i32 %A, i64 %B.coerce0, i32 %B.coerce1)
    148 }
    149 
    150 struct f24s { long a; int b; };
    151 
    152 struct f23S f24(struct f23S *X, struct f24s *P2) {
    153   return *X;
    154 
    155   // CHECK: define { i64, i32 } @f24(%struct.f23S* %X, %struct.f24s* %P2)
    156 }
    157 
    158 // rdar://8248065
    159 typedef float v4f32 __attribute__((__vector_size__(16)));
    160 v4f32 f25(v4f32 X) {
    161   // CHECK-LABEL: define <4 x float> @f25(<4 x float> %X)
    162   // CHECK-NOT: alloca
    163   // CHECK: alloca <4 x float>
    164   // CHECK-NOT: alloca
    165   // CHECK: store <4 x float> %X, <4 x float>*
    166   // CHECK-NOT: store
    167   // CHECK: ret <4 x float>
    168   return X+X;
    169 }
    170 
    171 struct foo26 {
    172   int *X;
    173   float *Y;
    174 };
    175 
    176 struct foo26 f26(struct foo26 *P) {
    177   // CHECK: define { i32*, float* } @f26(%struct.foo26* %P)
    178   return *P;
    179 }
    180 
    181 
    182 struct v4f32wrapper {
    183   v4f32 v;
    184 };
    185 
    186 struct v4f32wrapper f27(struct v4f32wrapper X) {
    187   // CHECK-LABEL: define <4 x float> @f27(<4 x float> %X.coerce)
    188   return X;
    189 }
    190 
    191 // PR22563 - We should unwrap simple structs and arrays to pass
    192 // and return them in the appropriate vector registers if possible.
    193 
    194 typedef float v8f32 __attribute__((__vector_size__(32)));
    195 struct v8f32wrapper {
    196   v8f32 v;
    197 };
    198 
    199 struct v8f32wrapper f27a(struct v8f32wrapper X) {
    200   // AVX-LABEL: define <8 x float> @f27a(<8 x float> %X.coerce)
    201   return X;
    202 }
    203 
    204 struct v8f32wrapper_wrapper {
    205   v8f32 v[1];
    206 };
    207 
    208 struct v8f32wrapper_wrapper f27b(struct v8f32wrapper_wrapper X) {
    209   // AVX-LABEL: define <8 x float> @f27b(<8 x float> %X.coerce)
    210   return X;
    211 }
    212 
    213 // rdar://5711709
    214 struct f28c {
    215   double x;
    216   int y;
    217 };
    218 void f28(struct f28c C) {
    219   // CHECK-LABEL: define void @f28(double %C.coerce0, i32 %C.coerce1)
    220 }
    221 
    222 struct f29a {
    223   struct c {
    224     double x;
    225     int y;
    226   } x[1];
    227 };
    228 
    229 void f29a(struct f29a A) {
    230   // CHECK-LABEL: define void @f29a(double %A.coerce0, i32 %A.coerce1)
    231 }
    232 
    233 // rdar://8249586
    234 struct S0 { char f0[8]; char f2; char f3; char f4; };
    235 void f30(struct S0 p_4) {
    236   // CHECK-LABEL: define void @f30(i64 %p_4.coerce0, i24 %p_4.coerce1)
    237 }
    238 
    239 // Pass the third element as a float when followed by tail padding.
    240 // rdar://8251384
    241 struct f31foo { float a, b, c; };
    242 float f31(struct f31foo X) {
    243   // CHECK-LABEL: define float @f31(<2 x float> %X.coerce0, float %X.coerce1)
    244   return X.c;
    245 }
    246 
    247 _Complex float f32(_Complex float A, _Complex float B) {
    248   // rdar://6379669
    249   // CHECK-LABEL: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce)
    250   return A+B;
    251 }
    252 
    253 
    254 // rdar://8357396
    255 struct f33s { long x; float c,d; };
    256 
    257 void f33(va_list X) {
    258   va_arg(X, struct f33s);
    259 }
    260 
    261 typedef unsigned long long v1i64 __attribute__((__vector_size__(8)));
    262 
    263 // rdar://8359248
    264 // CHECK-LABEL: define i64 @f34(i64 %arg.coerce)
    265 v1i64 f34(v1i64 arg) { return arg; }
    266 
    267 
    268 // rdar://8358475
    269 // CHECK-LABEL: define i64 @f35(i64 %arg.coerce)
    270 typedef unsigned long v1i64_2 __attribute__((__vector_size__(8)));
    271 v1i64_2 f35(v1i64_2 arg) { return arg+arg; }
    272 
    273 // rdar://9122143
    274 // CHECK: declare void @func(%struct._str* byval align 16)
    275 typedef struct _str {
    276   union {
    277     long double a;
    278     long c;
    279   };
    280 } str;
    281 
    282 void func(str s);
    283 str ss;
    284 void f9122143()
    285 {
    286   func(ss);
    287 }
    288 
    289 // CHECK-LABEL: define double @f36(double %arg.coerce)
    290 typedef unsigned v2i32 __attribute((__vector_size__(8)));
    291 v2i32 f36(v2i32 arg) { return arg; }
    292 
    293 // AVX: declare void @f38(<8 x float>)
    294 // AVX: declare void @f37(<8 x float>)
    295 // SSE: declare void @f38(%struct.s256* byval align 32)
    296 // SSE: declare void @f37(<8 x float>* byval align 32)
    297 typedef float __m256 __attribute__ ((__vector_size__ (32)));
    298 typedef struct {
    299   __m256 m;
    300 } s256;
    301 
    302 s256 x38;
    303 __m256 x37;
    304 
    305 void f38(s256 x);
    306 void f37(__m256 x);
    307 void f39() { f38(x38); f37(x37); }
    308 
    309 // The two next tests make sure that the struct below is passed
    310 // in the same way regardless of avx being used
    311 
    312 // CHECK: declare void @func40(%struct.t128* byval align 16)
    313 typedef float __m128 __attribute__ ((__vector_size__ (16)));
    314 typedef struct t128 {
    315   __m128 m;
    316   __m128 n;
    317 } two128;
    318 
    319 extern void func40(two128 s);
    320 void func41(two128 s) {
    321   func40(s);
    322 }
    323 
    324 // CHECK: declare void @func42(%struct.t128_2* byval align 16)
    325 typedef struct xxx {
    326   __m128 array[2];
    327 } Atwo128;
    328 typedef struct t128_2 {
    329   Atwo128 x;
    330 } SA;
    331 
    332 extern void func42(SA s);
    333 void func43(SA s) {
    334   func42(s);
    335 }
    336 
    337 // CHECK-LABEL: define i32 @f44
    338 // CHECK: ptrtoint
    339 // CHECK-NEXT: add i64 %{{[0-9]+}}, 31
    340 // CHECK-NEXT: and i64 %{{[0-9]+}}, -32
    341 // CHECK-NEXT: inttoptr
    342 typedef int T44 __attribute((vector_size(32)));
    343 struct s44 { T44 x; int y; };
    344 int f44(int i, ...) {
    345   __builtin_va_list ap;
    346   __builtin_va_start(ap, i);
    347   struct s44 s = __builtin_va_arg(ap, struct s44);
    348   __builtin_va_end(ap);
    349   return s.y;
    350 }
    351 
    352 // Text that vec3 returns the correct LLVM IR type.
    353 // AVX-LABEL: define i32 @foo(<3 x i64> %X)
    354 typedef long long3 __attribute((ext_vector_type(3)));
    355 int foo(long3 X)
    356 {
    357   return 0;
    358 }
    359 
    360 // Make sure we don't use a varargs convention for a function without a
    361 // prototype where AVX types are involved.
    362 // AVX: @test45
    363 // AVX: call i32 bitcast (i32 (...)* @f45 to i32 (<8 x float>)*)
    364 int f45();
    365 __m256 x45;
    366 void test45() { f45(x45); }
    367 
    368 // Make sure we use byval to pass 64-bit vectors in memory; the LLVM call
    369 // lowering can't handle this case correctly because it runs after legalization.
    370 // CHECK: @test46
    371 // CHECK: call void @f46({{.*}}<2 x float>* byval align 8 {{.*}}, <2 x float>* byval align 8 {{.*}})
    372 typedef float v46 __attribute((vector_size(8)));
    373 void f46(v46,v46,v46,v46,v46,v46,v46,v46,v46,v46);
    374 void test46() { v46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); }
    375 
    376 // Check that we pass the struct below without using byval, which helps out
    377 // codegen.
    378 //
    379 // CHECK: @test47
    380 // CHECK: call void @f47(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
    381 struct s47 { unsigned a; };
    382 void f47(int,int,int,int,int,int,struct s47);
    383 void test47(int a, struct s47 b) { f47(a, a, a, a, a, a, b); }
    384 
    385 // rdar://12723368
    386 // In the following example, there are holes in T4 at the 3rd byte and the 4th
    387 // byte, however, T2 does not have those holes. T4 is chosen to be the
    388 // representing type for union T1, but we can't use load or store of T4 since
    389 // it will skip the 3rd byte and the 4th byte.
    390 // In general, Since we don't accurately represent the data fields of a union,
    391 // do not use load or store of the representing llvm type for the union.
    392 typedef _Complex int T2;
    393 typedef _Complex char T5;
    394 typedef _Complex int T7;
    395 typedef struct T4 { T5 field0; T7 field1; } T4;
    396 typedef union T1 { T2 field0; T4 field1; } T1;
    397 extern T1 T1_retval;
    398 T1 test48(void) {
    399 // CHECK: @test48
    400 // CHECK: memcpy
    401 // CHECK: memcpy
    402   return T1_retval;
    403 }
    404 
    405 void test49_helper(double, ...);
    406 void test49(double d, double e) {
    407   test49_helper(d, e);
    408 }
    409 // CHECK-LABEL:    define void @test49(
    410 // CHECK:      [[T0:%.*]] = load double, double*
    411 // CHECK-NEXT: [[T1:%.*]] = load double, double*
    412 // CHECK-NEXT: call void (double, ...) @test49_helper(double [[T0]], double [[T1]])
    413 
    414 void test50_helper();
    415 void test50(double d, double e) {
    416   test50_helper(d, e);
    417 }
    418 // CHECK-LABEL:    define void @test50(
    419 // CHECK:      [[T0:%.*]] = load double, double*
    420 // CHECK-NEXT: [[T1:%.*]] = load double, double*
    421 // CHECK-NEXT: call void (double, double, ...) bitcast (void (...)* @test50_helper to void (double, double, ...)*)(double [[T0]], double [[T1]])
    422 
    423 struct test51_s { __uint128_t intval; };
    424 void test51(struct test51_s *s, __builtin_va_list argList) {
    425     *s = __builtin_va_arg(argList, struct test51_s);
    426 }
    427 
    428 // CHECK-LABEL: define void @test51
    429 // CHECK: [[TMP_ADDR:%.*]] = alloca [[STRUCT_TEST51:%.*]], align 16
    430 // CHECK: br i1
    431 // CHECK: [[REG_SAVE_AREA_PTR:%.*]] = getelementptr inbounds {{.*}}, i32 0, i32 3
    432 // CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8*, i8** [[REG_SAVE_AREA_PTR]]
    433 // CHECK-NEXT: [[VALUE_ADDR:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 {{.*}}
    434 // CHECK-NEXT: [[CASTED_VALUE_ADDR:%.*]] = bitcast i8* [[VALUE_ADDR]] to [[STRUCT_TEST51]]
    435 // CHECK-NEXT: [[CASTED_TMP_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[TMP_ADDR]] to i8*
    436 // CHECK-NEXT: [[RECASTED_VALUE_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[CASTED_VALUE_ADDR]] to i8*
    437 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CASTED_TMP_ADDR]], i8* [[RECASTED_VALUE_ADDR]], i64 16, i32 8, i1 false)
    438 // CHECK-NEXT: add i32 {{.*}}, 16
    439 // CHECK-NEXT: store i32 {{.*}}, i32* {{.*}}
    440 // CHECK-NEXT: br label
    441 
    442 void test52_helper(int, ...);
    443 __m256 x52;
    444 void test52() {
    445   test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
    446 }
    447 // AVX: @test52_helper(i32 0, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
    448 
    449 void test53(__m256 *m, __builtin_va_list argList) {
    450   *m = __builtin_va_arg(argList, __m256);
    451 }
    452 // AVX-LABEL: define void @test53
    453 // AVX-NOT: br i1
    454 // AVX: ret void
    455 
    456 void test54_helper(__m256, ...);
    457 __m256 x54;
    458 void test54() {
    459   test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
    460   test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
    461 }
    462 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
    463 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
    464 
    465 typedef float __m512 __attribute__ ((__vector_size__ (64)));
    466 typedef struct {
    467   __m512 m;
    468 } s512;
    469 
    470 s512 x55;
    471 __m512 x56;
    472 
    473 // Even on AVX512, aggregates of size larger than four eightbytes have class
    474 // MEMORY (AVX512 draft 0.3 3.2.3p2 Rule 1).
    475 //
    476 // CHECK: declare void @f55(%struct.s512* byval align 64)
    477 void f55(s512 x);
    478 
    479 // However, __m512 has type SSE/SSEUP on AVX512.
    480 //
    481 // AVX512: declare void @f56(<16 x float>)
    482 // NO-AVX512: declare void @f56(<16 x float>* byval align 64)
    483 void f56(__m512 x);
    484 void f57() { f55(x55); f56(x56); }
    485 
    486 // Like for __m128 on AVX, check that the struct below is passed
    487 // in the same way regardless of AVX512 being used.
    488 //
    489 // CHECK: declare void @f58(%struct.t256* byval align 32)
    490 typedef struct t256 {
    491   __m256 m;
    492   __m256 n;
    493 } two256;
    494 
    495 extern void f58(two256 s);
    496 void f59(two256 s) {
    497   f58(s);
    498 }
    499 
    500 // CHECK: declare void @f60(%struct.sat256* byval align 32)
    501 typedef struct at256 {
    502   __m256 array[2];
    503 } Atwo256;
    504 typedef struct sat256 {
    505   Atwo256 x;
    506 } SAtwo256;
    507 
    508 extern void f60(SAtwo256 s);
    509 void f61(SAtwo256 s) {
    510   f60(s);
    511 }
    512 
    513 // AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
    514 void f62_helper(int, ...);
    515 __m512 x62;
    516 void f62() {
    517   f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
    518 }
    519 
    520 // Like for __m256 on AVX, we always pass __m512 in memory, and don't
    521 // need to use the register save area.
    522 //
    523 // AVX512-LABEL: define void @f63
    524 // AVX512-NOT: br i1
    525 // AVX512: ret void
    526 void f63(__m512 *m, __builtin_va_list argList) {
    527   *m = __builtin_va_arg(argList, __m512);
    528 }
    529 
    530 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
    531 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
    532 void f64_helper(__m512, ...);
    533 __m512 x64;
    534 void f64() {
    535   f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
    536   f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
    537 }
    538