1 // RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s 2 3 // Don't include mm_malloc.h, it's system specific. 4 #define __MM_MALLOC_H 5 6 #include <immintrin.h> 7 8 // 9 // Test LLVM IR codegen of shuffle instructions 10 // 11 12 __m256 x(__m256 a, __m256 b) { 13 // Check if the mask is correct 14 // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15> 15 return _mm256_shuffle_ps(a, b, 203); 16 } 17 18 __m128d test_mm_permute_pd(__m128d a) { 19 // Check if the mask is correct 20 // CHECK: shufflevector{{.*}}<i32 1, i32 0> 21 return _mm_permute_pd(a, 1); 22 } 23 24 __m256d test_mm256_permute_pd(__m256d a) { 25 // Check if the mask is correct 26 // CHECK: shufflevector{{.*}}<i32 1, i32 0, i32 3, i32 2> 27 return _mm256_permute_pd(a, 5); 28 } 29 30 __m128 test_mm_permute_ps(__m128 a) { 31 // Check if the mask is correct 32 // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 1, i32 0> 33 return _mm_permute_ps(a, 0x1b); 34 } 35 36 // Test case for PR12401 37 __m128 test_mm_permute_ps2(__m128 a) { 38 // Check if the mask is correct 39 // CHECK: shufflevector{{.*}}<i32 2, i32 1, i32 2, i32 3> 40 return _mm_permute_ps(a, 0xe6); 41 } 42 43 __m256 test_mm256_permute_ps(__m256 a) { 44 // Check if the mask is correct 45 // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 46 return _mm256_permute_ps(a, 0x1b); 47 } 48 49 __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) { 50 // Check if the mask is correct 51 // CHECK: shufflevector{{.*}}<i32 2, i32 3, i32 6, i32 7> 52 return _mm256_permute2f128_pd(a, b, 0x31); 53 } 54 55 __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) { 56 // Check if the mask is correct 57 // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 58 return _mm256_permute2f128_ps(a, b, 0x13); 59 } 60 61 __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) { 62 // Check if the mask is correct 63 // CHECK: shufflevector{{.*}} <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 64 return _mm256_permute2f128_si256(a, b, 0x20); 65 } 66 67 __m128 68 test_mm_broadcast_ss(float const *__a) { 69 // CHECK-LABEL: @test_mm_broadcast_ss 70 // CHECK: insertelement <4 x float> {{.*}}, i32 0 71 // CHECK: insertelement <4 x float> {{.*}}, i32 1 72 // CHECK: insertelement <4 x float> {{.*}}, i32 2 73 // CHECK: insertelement <4 x float> {{.*}}, i32 3 74 return _mm_broadcast_ss(__a); 75 } 76 77 __m256d 78 test_mm256_broadcast_sd(double const *__a) { 79 // CHECK-LABEL: @test_mm256_broadcast_sd 80 // CHECK: insertelement <4 x double> {{.*}}, i32 0 81 // CHECK: insertelement <4 x double> {{.*}}, i32 1 82 // CHECK: insertelement <4 x double> {{.*}}, i32 2 83 // CHECK: insertelement <4 x double> {{.*}}, i32 3 84 return _mm256_broadcast_sd(__a); 85 } 86 87 __m256 88 test_mm256_broadcast_ss(float const *__a) { 89 // CHECK-LABEL: @test_mm256_broadcast_ss 90 // CHECK: insertelement <8 x float> {{.*}}, i32 0 91 // CHECK: insertelement <8 x float> {{.*}}, i32 1 92 // CHECK: insertelement <8 x float> {{.*}}, i32 2 93 // CHECK: insertelement <8 x float> {{.*}}, i32 3 94 // CHECK: insertelement <8 x float> {{.*}}, i32 4 95 // CHECK: insertelement <8 x float> {{.*}}, i32 5 96 // CHECK: insertelement <8 x float> {{.*}}, i32 6 97 // CHECK: insertelement <8 x float> {{.*}}, i32 7 98 return _mm256_broadcast_ss(__a); 99 } 100 101 // Make sure we have the correct mask for each insertf128 case. 102 103 __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) { 104 // CHECK-LABEL: @test_mm256_insertf128_ps_0 105 // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7> 106 return _mm256_insertf128_ps(a, b, 0); 107 } 108 109 __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) { 110 // CHECK-LABEL: @test_mm256_insertf128_pd_0 111 // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3> 112 return _mm256_insertf128_pd(a, b, 0); 113 } 114 115 __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) { 116 // CHECK-LABEL: @test_mm256_insertf128_si256_0 117 // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3> 118 return _mm256_insertf128_si256(a, b, 0); 119 } 120 121 __m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) { 122 // CHECK-LABEL: @test_mm256_insertf128_ps_1 123 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 124 return _mm256_insertf128_ps(a, b, 1); 125 } 126 127 __m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) { 128 // CHECK-LABEL: @test_mm256_insertf128_pd_1 129 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5> 130 return _mm256_insertf128_pd(a, b, 1); 131 } 132 133 __m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) { 134 // CHECK-LABEL: @test_mm256_insertf128_si256_1 135 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5> 136 return _mm256_insertf128_si256(a, b, 1); 137 } 138 139 // Make sure we have the correct mask for each extractf128 case. 140 141 __m128 test_mm256_extractf128_ps_0(__m256 a) { 142 // CHECK-LABEL: @test_mm256_extractf128_ps_0 143 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3> 144 return _mm256_extractf128_ps(a, 0); 145 } 146 147 __m128d test_mm256_extractf128_pd_0(__m256d a) { 148 // CHECK-LABEL: @test_mm256_extractf128_pd_0 149 // CHECK: shufflevector{{.*}}<i32 0, i32 1> 150 return _mm256_extractf128_pd(a, 0); 151 } 152 153 __m128i test_mm256_extractf128_si256_0(__m256i a) { 154 // CHECK-LABEL: @test_mm256_extractf128_si256_0 155 // CHECK: shufflevector{{.*}}<i32 0, i32 1> 156 return _mm256_extractf128_si256(a, 0); 157 } 158 159 __m128 test_mm256_extractf128_ps_1(__m256 a) { 160 // CHECK-LABEL: @test_mm256_extractf128_ps_1 161 // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7> 162 return _mm256_extractf128_ps(a, 1); 163 } 164 165 __m128d test_mm256_extractf128_pd_1(__m256d a) { 166 // CHECK-LABEL: @test_mm256_extractf128_pd_1 167 // CHECK: shufflevector{{.*}}<i32 2, i32 3> 168 return _mm256_extractf128_pd(a, 1); 169 } 170 171 __m128i test_mm256_extractf128_si256_1(__m256i a) { 172 // CHECK-LABEL: @test_mm256_extractf128_si256_1 173 // CHECK: shufflevector{{.*}}<i32 2, i32 3> 174 return _mm256_extractf128_si256(a, 1); 175 } 176 177