1 // RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s 2 // FIXME: This is testing optimized generation of shuffle instructions and should be fixed. 3 4 // Don't include mm_malloc.h, it's system specific. 5 #define __MM_MALLOC_H 6 7 #include <immintrin.h> 8 9 // 10 // Test LLVM IR codegen of shuffle instructions 11 // 12 13 __m256 x(__m256 a, __m256 b) { 14 // Check if the mask is correct 15 // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15> 16 return _mm256_shuffle_ps(a, b, 203); 17 } 18 19 __m128d test_mm_permute_pd(__m128d a) { 20 // Check if the mask is correct 21 // CHECK: shufflevector{{.*}}<i32 1, i32 0> 22 return _mm_permute_pd(a, 1); 23 } 24 25 __m256d test_mm256_permute_pd(__m256d a) { 26 // Check if the mask is correct 27 // CHECK: shufflevector{{.*}}<i32 1, i32 0, i32 3, i32 2> 28 return _mm256_permute_pd(a, 5); 29 } 30 31 __m128 test_mm_permute_ps(__m128 a) { 32 // Check if the mask is correct 33 // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 1, i32 0> 34 return _mm_permute_ps(a, 0x1b); 35 } 36 37 // Test case for PR12401 38 __m128 test_mm_permute_ps2(__m128 a) { 39 // Check if the mask is correct 40 // CHECK: shufflevector{{.*}}<i32 2, i32 1, i32 2, i32 3> 41 return _mm_permute_ps(a, 0xe6); 42 } 43 44 __m256 test_mm256_permute_ps(__m256 a) { 45 // Check if the mask is correct 46 // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 47 return _mm256_permute_ps(a, 0x1b); 48 } 49 50 __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) { 51 // Check if the mask is correct 52 // CHECK: shufflevector{{.*}}<i32 2, i32 3, i32 6, i32 7> 53 return _mm256_permute2f128_pd(a, b, 0x31); 54 } 55 56 __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) { 57 // Check if the mask is correct 58 // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 59 return _mm256_permute2f128_ps(a, b, 0x13); 60 } 61 62 __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) { 63 // Check if the mask is correct 64 // CHECK: shufflevector{{.*}} <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 65 return _mm256_permute2f128_si256(a, b, 0x20); 66 } 67 68 __m128 69 test_mm_broadcast_ss(float const *__a) { 70 // CHECK-LABEL: @test_mm_broadcast_ss 71 // CHECK: insertelement <4 x float> {{.*}}, i32 0 72 // CHECK: insertelement <4 x float> {{.*}}, i32 1 73 // CHECK: insertelement <4 x float> {{.*}}, i32 2 74 // CHECK: insertelement <4 x float> {{.*}}, i32 3 75 return _mm_broadcast_ss(__a); 76 } 77 78 __m256d 79 test_mm256_broadcast_sd(double const *__a) { 80 // CHECK-LABEL: @test_mm256_broadcast_sd 81 // CHECK: insertelement <4 x double> {{.*}}, i32 0 82 // CHECK: insertelement <4 x double> {{.*}}, i32 1 83 // CHECK: insertelement <4 x double> {{.*}}, i32 2 84 // CHECK: insertelement <4 x double> {{.*}}, i32 3 85 return _mm256_broadcast_sd(__a); 86 } 87 88 __m256 89 test_mm256_broadcast_ss(float const *__a) { 90 // CHECK-LABEL: @test_mm256_broadcast_ss 91 // CHECK: insertelement <8 x float> {{.*}}, i32 0 92 // CHECK: insertelement <8 x float> {{.*}}, i32 1 93 // CHECK: insertelement <8 x float> {{.*}}, i32 2 94 // CHECK: insertelement <8 x float> {{.*}}, i32 3 95 // CHECK: insertelement <8 x float> {{.*}}, i32 4 96 // CHECK: insertelement <8 x float> {{.*}}, i32 5 97 // CHECK: insertelement <8 x float> {{.*}}, i32 6 98 // CHECK: insertelement <8 x float> {{.*}}, i32 7 99 return _mm256_broadcast_ss(__a); 100 } 101 102 // Make sure we have the correct mask for each insertf128 case. 103 104 __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) { 105 // CHECK-LABEL: @test_mm256_insertf128_ps_0 106 // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7> 107 return _mm256_insertf128_ps(a, b, 0); 108 } 109 110 __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) { 111 // CHECK-LABEL: @test_mm256_insertf128_pd_0 112 // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3> 113 return _mm256_insertf128_pd(a, b, 0); 114 } 115 116 __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) { 117 // CHECK-LABEL: @test_mm256_insertf128_si256_0 118 // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3> 119 return _mm256_insertf128_si256(a, b, 0); 120 } 121 122 __m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) { 123 // CHECK-LABEL: @test_mm256_insertf128_ps_1 124 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 125 return _mm256_insertf128_ps(a, b, 1); 126 } 127 128 __m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) { 129 // CHECK-LABEL: @test_mm256_insertf128_pd_1 130 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5> 131 return _mm256_insertf128_pd(a, b, 1); 132 } 133 134 __m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) { 135 // CHECK-LABEL: @test_mm256_insertf128_si256_1 136 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5> 137 return _mm256_insertf128_si256(a, b, 1); 138 } 139 140 // Make sure we have the correct mask for each extractf128 case. 141 142 __m128 test_mm256_extractf128_ps_0(__m256 a) { 143 // CHECK-LABEL: @test_mm256_extractf128_ps_0 144 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3> 145 return _mm256_extractf128_ps(a, 0); 146 } 147 148 __m128d test_mm256_extractf128_pd_0(__m256d a) { 149 // CHECK-LABEL: @test_mm256_extractf128_pd_0 150 // CHECK: shufflevector{{.*}}<i32 0, i32 1> 151 return _mm256_extractf128_pd(a, 0); 152 } 153 154 __m128i test_mm256_extractf128_si256_0(__m256i a) { 155 // CHECK-LABEL: @test_mm256_extractf128_si256_0 156 // CHECK: shufflevector{{.*}}<i32 0, i32 1> 157 return _mm256_extractf128_si256(a, 0); 158 } 159 160 __m128 test_mm256_extractf128_ps_1(__m256 a) { 161 // CHECK-LABEL: @test_mm256_extractf128_ps_1 162 // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7> 163 return _mm256_extractf128_ps(a, 1); 164 } 165 166 __m128d test_mm256_extractf128_pd_1(__m256d a) { 167 // CHECK-LABEL: @test_mm256_extractf128_pd_1 168 // CHECK: shufflevector{{.*}}<i32 2, i32 3> 169 return _mm256_extractf128_pd(a, 1); 170 } 171 172 __m128i test_mm256_extractf128_si256_1(__m256i a) { 173 // CHECK-LABEL: @test_mm256_extractf128_si256_1 174 // CHECK: shufflevector{{.*}}<i32 2, i32 3> 175 return _mm256_extractf128_si256(a, 1); 176 } 177 178 __m256 test_mm256_set_m128(__m128 hi, __m128 lo) { 179 // CHECK-LABEL: @test_mm256_set_m128 180 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 181 return _mm256_set_m128(hi, lo); 182 } 183 184 __m256d test_mm256_set_m128d(__m128d hi, __m128d lo) { 185 // CHECK-LABEL: @test_mm256_set_m128d 186 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 187 return _mm256_set_m128d(hi, lo); 188 } 189 190 __m256i test_mm256_set_m128i(__m128i hi, __m128i lo) { 191 // CHECK-LABEL: @test_mm256_set_m128i 192 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 193 return _mm256_set_m128i(hi, lo); 194 } 195 196 __m256 test_mm256_setr_m128(__m128 hi, __m128 lo) { 197 // CHECK-LABEL: @test_mm256_setr_m128 198 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 199 return _mm256_setr_m128(lo, hi); 200 } 201 202 __m256d test_mm256_setr_m128d(__m128d hi, __m128d lo) { 203 // CHECK-LABEL: @test_mm256_setr_m128d 204 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 205 return _mm256_setr_m128d(lo, hi); 206 } 207 208 __m256i test_mm256_setr_m128i(__m128i hi, __m128i lo) { 209 // CHECK-LABEL: @test_mm256_setr_m128i 210 // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 211 return _mm256_setr_m128i(lo, hi); 212 } 213