1 // RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s 2 3 // Don't include mm_malloc.h, it's system specific. 4 #define __MM_MALLOC_H 5 6 #include <immintrin.h> 7 8 // 9 // Test LLVM IR codegen of shuffle instructions 10 // 11 12 __m256 x(__m256 a, __m256 b) { 13 // Check if the mask is correct 14 // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15> 15 return _mm256_shuffle_ps(a, b, 203); 16 } 17 18 __m128d test_mm_permute_pd(__m128d a) { 19 // Check if the mask is correct 20 // CHECK: shufflevector{{.*}}<i32 1, i32 0> 21 return _mm_permute_pd(a, 1); 22 } 23 24 __m256d test_mm256_permute_pd(__m256d a) { 25 // Check if the mask is correct 26 // CHECK: shufflevector{{.*}}<i32 1, i32 0, i32 3, i32 2> 27 return _mm256_permute_pd(a, 5); 28 } 29 30 __m128 test_mm_permute_ps(__m128 a) { 31 // Check if the mask is correct 32 // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 1, i32 0> 33 return _mm_permute_ps(a, 0x1b); 34 } 35 36 // Test case for PR12401 37 __m128 test_mm_permute_ps2(__m128 a) { 38 // Check if the mask is correct 39 // CHECK: shufflevector{{.*}}<i32 2, i32 1, i32 2, i32 3> 40 return _mm_permute_ps(a, 0xe6); 41 } 42 43 __m256 test_mm256_permute_ps(__m256 a) { 44 // Check if the mask is correct 45 // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 46 return _mm256_permute_ps(a, 0x1b); 47 } 48 49 __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) { 50 // Check if the mask is correct 51 // CHECK: @llvm.x86.avx.vperm2f128.pd.256 52 return _mm256_permute2f128_pd(a, b, 0x31); 53 } 54 55 __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) { 56 // Check if the mask is correct 57 // CHECK: @llvm.x86.avx.vperm2f128.ps.256 58 return _mm256_permute2f128_ps(a, b, 0x13); 59 } 60 61 __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) { 62 // Check if the mask is correct 63 // CHECK: @llvm.x86.avx.vperm2f128.si.256 64 return _mm256_permute2f128_si256(a, b, 0x20); 65 } 66 67 __m128 68 test_mm_broadcast_ss(float const *__a) { 69 // CHECK-LABEL: @test_mm_broadcast_ss 70 // CHECK: insertelement <4 x float> {{.*}}, i32 0 71 // CHECK: insertelement <4 x float> {{.*}}, i32 1 72 // CHECK: insertelement <4 x float> {{.*}}, i32 2 73 // CHECK: insertelement <4 x float> {{.*}}, i32 3 74 return _mm_broadcast_ss(__a); 75 } 76 77 __m256d 78 test_mm256_broadcast_sd(double const *__a) { 79 // CHECK-LABEL: @test_mm256_broadcast_sd 80 // CHECK: insertelement <4 x double> {{.*}}, i32 0 81 // CHECK: insertelement <4 x double> {{.*}}, i32 1 82 // CHECK: insertelement <4 x double> {{.*}}, i32 2 83 // CHECK: insertelement <4 x double> {{.*}}, i32 3 84 return _mm256_broadcast_sd(__a); 85 } 86 87 __m256 88 test_mm256_broadcast_ss(float const *__a) { 89 // CHECK-LABEL: @test_mm256_broadcast_ss 90 // CHECK: insertelement <8 x float> {{.*}}, i32 0 91 // CHECK: insertelement <8 x float> {{.*}}, i32 1 92 // CHECK: insertelement <8 x float> {{.*}}, i32 2 93 // CHECK: insertelement <8 x float> {{.*}}, i32 3 94 // CHECK: insertelement <8 x float> {{.*}}, i32 4 95 // CHECK: insertelement <8 x float> {{.*}}, i32 5 96 // CHECK: insertelement <8 x float> {{.*}}, i32 6 97 // CHECK: insertelement <8 x float> {{.*}}, i32 7 98 return _mm256_broadcast_ss(__a); 99 } 100