Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s
      2 // FIXME: This is testing optimized generation of shuffle instructions and should be fixed.
      3 
      4 // Don't include mm_malloc.h, it's system specific.
      5 #define __MM_MALLOC_H
      6 
      7 #include <immintrin.h>
      8 
      9 //
     10 // Test LLVM IR codegen of shuffle instructions
     11 //
     12 
     13 __m256 x(__m256 a, __m256 b) {
     14   // Check if the mask is correct
     15   // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
     16   return _mm256_shuffle_ps(a, b, 203);
     17 }
     18 
     19 __m128d test_mm_permute_pd(__m128d a) {
     20   // Check if the mask is correct
     21   // CHECK: shufflevector{{.*}}<i32 1, i32 0>
     22   return _mm_permute_pd(a, 1);
     23 }
     24 
     25 __m256d test_mm256_permute_pd(__m256d a) {
     26   // Check if the mask is correct
     27   // CHECK: shufflevector{{.*}}<i32 1, i32 0, i32 3, i32 2>
     28   return _mm256_permute_pd(a, 5);
     29 }
     30 
     31 __m128 test_mm_permute_ps(__m128 a) {
     32   // Check if the mask is correct
     33   // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 1, i32 0>
     34   return _mm_permute_ps(a, 0x1b);
     35 }
     36 
     37 // Test case for PR12401
     38 __m128 test_mm_permute_ps2(__m128 a) {
     39   // Check if the mask is correct
     40   // CHECK: shufflevector{{.*}}<i32 2, i32 1, i32 2, i32 3>
     41   return _mm_permute_ps(a, 0xe6);
     42 }
     43 
     44 __m256 test_mm256_permute_ps(__m256 a) {
     45   // Check if the mask is correct
     46   // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
     47   return _mm256_permute_ps(a, 0x1b);
     48 }
     49 
     50 __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) {
     51   // Check if the mask is correct
     52   // CHECK: shufflevector{{.*}}<i32 2, i32 3, i32 6, i32 7>
     53   return _mm256_permute2f128_pd(a, b, 0x31);
     54 }
     55 
     56 __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) {
     57   // Check if the mask is correct
     58   // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
     59   return _mm256_permute2f128_ps(a, b, 0x13);
     60 }
     61 
     62 __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) {
     63   // Check if the mask is correct
     64   // CHECK: shufflevector{{.*}} <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
     65   return _mm256_permute2f128_si256(a, b, 0x20);
     66 }
     67 
     68 __m128
     69 test_mm_broadcast_ss(float const *__a) {
     70   // CHECK-LABEL: @test_mm_broadcast_ss
     71   // CHECK: insertelement <4 x float> {{.*}}, i32 0
     72   // CHECK: insertelement <4 x float> {{.*}}, i32 1
     73   // CHECK: insertelement <4 x float> {{.*}}, i32 2
     74   // CHECK: insertelement <4 x float> {{.*}}, i32 3
     75   return _mm_broadcast_ss(__a);
     76 }
     77 
     78 __m256d
     79 test_mm256_broadcast_sd(double const *__a) {
     80   // CHECK-LABEL: @test_mm256_broadcast_sd
     81   // CHECK: insertelement <4 x double> {{.*}}, i32 0
     82   // CHECK: insertelement <4 x double> {{.*}}, i32 1
     83   // CHECK: insertelement <4 x double> {{.*}}, i32 2
     84   // CHECK: insertelement <4 x double> {{.*}}, i32 3
     85   return _mm256_broadcast_sd(__a);
     86 }
     87 
     88 __m256
     89 test_mm256_broadcast_ss(float const *__a) {
     90   // CHECK-LABEL: @test_mm256_broadcast_ss
     91   // CHECK: insertelement <8 x float> {{.*}}, i32 0
     92   // CHECK: insertelement <8 x float> {{.*}}, i32 1
     93   // CHECK: insertelement <8 x float> {{.*}}, i32 2
     94   // CHECK: insertelement <8 x float> {{.*}}, i32 3
     95   // CHECK: insertelement <8 x float> {{.*}}, i32 4
     96   // CHECK: insertelement <8 x float> {{.*}}, i32 5
     97   // CHECK: insertelement <8 x float> {{.*}}, i32 6
     98   // CHECK: insertelement <8 x float> {{.*}}, i32 7
     99   return _mm256_broadcast_ss(__a);
    100 }
    101 
    102 // Make sure we have the correct mask for each insertf128 case.
    103 
    104 __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) {
    105   // CHECK-LABEL: @test_mm256_insertf128_ps_0
    106   // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
    107   return _mm256_insertf128_ps(a, b, 0);
    108 }
    109 
    110 __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) {
    111   // CHECK-LABEL: @test_mm256_insertf128_pd_0
    112   // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
    113   return _mm256_insertf128_pd(a, b, 0);
    114 }
    115 
    116 __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) {
    117   // CHECK-LABEL: @test_mm256_insertf128_si256_0
    118   // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
    119   return _mm256_insertf128_si256(a, b, 0);
    120 }
    121 
    122 __m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) {
    123   // CHECK-LABEL: @test_mm256_insertf128_ps_1
    124   // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
    125   return _mm256_insertf128_ps(a, b, 1);
    126 }
    127 
    128 __m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) {
    129   // CHECK-LABEL: @test_mm256_insertf128_pd_1
    130   // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
    131   return _mm256_insertf128_pd(a, b, 1);
    132 }
    133 
    134 __m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) {
    135   // CHECK-LABEL: @test_mm256_insertf128_si256_1
    136   // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
    137   return _mm256_insertf128_si256(a, b, 1);
    138 }
    139 
    140 // Make sure we have the correct mask for each extractf128 case.
    141 
    142 __m128 test_mm256_extractf128_ps_0(__m256 a) {
    143   // CHECK-LABEL: @test_mm256_extractf128_ps_0
    144   // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3>
    145   return _mm256_extractf128_ps(a, 0);
    146 }
    147 
    148 __m128d test_mm256_extractf128_pd_0(__m256d a) {
    149   // CHECK-LABEL: @test_mm256_extractf128_pd_0
    150   // CHECK: shufflevector{{.*}}<i32 0, i32 1>
    151   return _mm256_extractf128_pd(a, 0);
    152 }
    153 
    154 __m128i test_mm256_extractf128_si256_0(__m256i a) {
    155   // CHECK-LABEL: @test_mm256_extractf128_si256_0
    156   // CHECK: shufflevector{{.*}}<i32 0, i32 1>
    157   return _mm256_extractf128_si256(a, 0);
    158 }
    159 
    160 __m128 test_mm256_extractf128_ps_1(__m256 a) {
    161   // CHECK-LABEL: @test_mm256_extractf128_ps_1
    162   // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7>
    163   return _mm256_extractf128_ps(a, 1);
    164 }
    165 
    166 __m128d test_mm256_extractf128_pd_1(__m256d a) {
    167   // CHECK-LABEL: @test_mm256_extractf128_pd_1
    168   // CHECK: shufflevector{{.*}}<i32 2, i32 3>
    169   return _mm256_extractf128_pd(a, 1);
    170 }
    171 
    172 __m128i test_mm256_extractf128_si256_1(__m256i a) {
    173   // CHECK-LABEL: @test_mm256_extractf128_si256_1
    174   // CHECK: shufflevector{{.*}}<i32 2, i32 3>
    175   return _mm256_extractf128_si256(a, 1);
    176 }
    177 
    178 __m256 test_mm256_set_m128(__m128 hi, __m128 lo) {
    179   // CHECK-LABEL: @test_mm256_set_m128
    180   // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    181   return _mm256_set_m128(hi, lo);
    182 }
    183 
    184 __m256d test_mm256_set_m128d(__m128d hi, __m128d lo) {
    185   // CHECK-LABEL: @test_mm256_set_m128d
    186   // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    187   return _mm256_set_m128d(hi, lo);
    188 }
    189 
    190 __m256i test_mm256_set_m128i(__m128i hi, __m128i lo) {
    191   // CHECK-LABEL: @test_mm256_set_m128i
    192   // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    193   return _mm256_set_m128i(hi, lo);
    194 }
    195 
    196 __m256 test_mm256_setr_m128(__m128 hi, __m128 lo) {
    197   // CHECK-LABEL: @test_mm256_setr_m128
    198   // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    199   return _mm256_setr_m128(lo, hi);
    200 }
    201 
    202 __m256d test_mm256_setr_m128d(__m128d hi, __m128d lo) {
    203   // CHECK-LABEL: @test_mm256_setr_m128d
    204   // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    205   return _mm256_setr_m128d(lo, hi);
    206 }
    207 
    208 __m256i test_mm256_setr_m128i(__m128i hi, __m128i lo) {
    209   // CHECK-LABEL: @test_mm256_setr_m128i
    210   // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    211   return _mm256_setr_m128i(lo, hi);
    212 }
    213