1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Werror | FileCheck %s 2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s 3 4 // Don't include mm_malloc.h, it's system specific. 5 #define __MM_MALLOC_H 6 7 #include <x86intrin.h> 8 9 __m128i test_mm_blend_epi16(__m128i V1, __m128i V2) { 10 // CHECK-LABEL: test_mm_blend_epi16 11 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7> 12 return _mm_blend_epi16(V1, V2, 42); 13 } 14 15 __m128d test_mm_blend_pd(__m128d V1, __m128d V2) { 16 // CHECK-LABEL: test_mm_blend_pd 17 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3> 18 return _mm_blend_pd(V1, V2, 2); 19 } 20 21 __m128 test_mm_blend_ps(__m128 V1, __m128 V2) { 22 // CHECK-LABEL: test_mm_blend_ps 23 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 24 return _mm_blend_ps(V1, V2, 6); 25 } 26 27 __m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) { 28 // CHECK-LABEL: test_mm_blendv_epi8 29 // CHECK: call <16 x i8> @llvm.x86.sse41.pblendvb 30 return _mm_blendv_epi8(V1, V2, V3); 31 } 32 33 __m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) { 34 // CHECK-LABEL: test_mm_blendv_pd 35 // CHECK: call <2 x double> @llvm.x86.sse41.blendvpd 36 return _mm_blendv_pd(V1, V2, V3); 37 } 38 39 __m128 test_mm_blendv_ps(__m128 V1, __m128 V2, __m128 V3) { 40 // CHECK-LABEL: test_mm_blendv_ps 41 // CHECK: call <4 x float> @llvm.x86.sse41.blendvps 42 return _mm_blendv_ps(V1, V2, V3); 43 } 44 45 __m128d test_mm_ceil_pd(__m128d x) { 46 // CHECK-LABEL: test_mm_ceil_pd 47 // CHECK: call <2 x double> @llvm.x86.sse41.round.pd 48 return _mm_ceil_pd(x); 49 } 50 51 __m128 test_mm_ceil_ps(__m128 x) { 52 // CHECK-LABEL: test_mm_ceil_ps 53 // CHECK: call <4 x float> @llvm.x86.sse41.round.ps 54 return _mm_ceil_ps(x); 55 } 56 57 __m128d test_mm_ceil_sd(__m128d x, __m128d y) { 58 // CHECK-LABEL: test_mm_ceil_sd 59 // CHECK: call <2 x double> @llvm.x86.sse41.round.sd 60 return _mm_ceil_sd(x, y); 61 } 62 63 __m128 test_mm_ceil_ss(__m128 x, __m128 y) { 64 // CHECK-LABEL: test_mm_ceil_ss 65 // CHECK: call <4 x float> @llvm.x86.sse41.round.ss 66 return _mm_ceil_ss(x, y); 67 } 68 69 __m128i test_mm_cmpeq_epi64(__m128i A, __m128i B) { 70 // CHECK-LABEL: test_mm_cmpeq_epi64 71 // CHECK: icmp eq <2 x i64> 72 return _mm_cmpeq_epi64(A, B); 73 } 74 75 __m128i test_mm_cvtepi8_epi16(__m128i a) { 76 // CHECK-LABEL: test_mm_cvtepi8_epi16 77 // CHECK: sext <8 x i8> {{.*}} to <8 x i16> 78 return _mm_cvtepi8_epi16(a); 79 } 80 81 __m128i test_mm_cvtepi8_epi32(__m128i a) { 82 // CHECK-LABEL: test_mm_cvtepi8_epi32 83 // CHECK: sext <4 x i8> {{.*}} to <4 x i32> 84 return _mm_cvtepi8_epi32(a); 85 } 86 87 __m128i test_mm_cvtepi8_epi64(__m128i a) { 88 // CHECK-LABEL: test_mm_cvtepi8_epi64 89 // CHECK: sext <2 x i8> {{.*}} to <2 x i64> 90 return _mm_cvtepi8_epi64(a); 91 } 92 93 __m128i test_mm_cvtepi16_epi32(__m128i a) { 94 // CHECK-LABEL: test_mm_cvtepi16_epi32 95 // CHECK: sext <4 x i16> {{.*}} to <4 x i32> 96 return _mm_cvtepi16_epi32(a); 97 } 98 99 __m128i test_mm_cvtepi16_epi64(__m128i a) { 100 // CHECK-LABEL: test_mm_cvtepi16_epi64 101 // CHECK: sext <2 x i16> {{.*}} to <2 x i64> 102 return _mm_cvtepi16_epi64(a); 103 } 104 105 __m128i test_mm_cvtepi32_epi64(__m128i a) { 106 // CHECK-LABEL: test_mm_cvtepi32_epi64 107 // CHECK: sext <2 x i32> {{.*}} to <2 x i64> 108 return _mm_cvtepi32_epi64(a); 109 } 110 111 __m128i test_mm_cvtepu8_epi16(__m128i a) { 112 // CHECK-LABEL: test_mm_cvtepu8_epi16 113 // CHECK: call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> {{.*}}) 114 return _mm_cvtepu8_epi16(a); 115 } 116 117 __m128i test_mm_cvtepu8_epi32(__m128i a) { 118 // CHECK-LABEL: test_mm_cvtepu8_epi32 119 // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> {{.*}}) 120 return _mm_cvtepu8_epi32(a); 121 } 122 123 __m128i test_mm_cvtepu8_epi64(__m128i a) { 124 // CHECK-LABEL: test_mm_cvtepu8_epi64 125 // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> {{.*}}) 126 return _mm_cvtepu8_epi64(a); 127 } 128 129 __m128i test_mm_cvtepu16_epi32(__m128i a) { 130 // CHECK-LABEL: test_mm_cvtepu16_epi32 131 // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> {{.*}}) 132 return _mm_cvtepu16_epi32(a); 133 } 134 135 __m128i test_mm_cvtepu16_epi64(__m128i a) { 136 // CHECK-LABEL: test_mm_cvtepu16_epi64 137 // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> {{.*}}) 138 return _mm_cvtepu16_epi64(a); 139 } 140 141 __m128i test_mm_cvtepu32_epi64(__m128i a) { 142 // CHECK-LABEL: test_mm_cvtepu32_epi64 143 // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}}) 144 return _mm_cvtepu32_epi64(a); 145 } 146 147 __m128d test_mm_dp_pd(__m128d x, __m128d y) { 148 // CHECK-LABEL: test_mm_dp_pd 149 // CHECK: call <2 x double> @llvm.x86.sse41.dppd 150 return _mm_dp_pd(x, y, 2); 151 } 152 153 __m128 test_mm_dp_ps(__m128 x, __m128 y) { 154 // CHECK-LABEL: test_mm_dp_ps 155 // CHECK: call <4 x float> @llvm.x86.sse41.dpps 156 return _mm_dp_ps(x, y, 2); 157 } 158 159 int test_mm_extract_epi8(__m128i x) { 160 // CHECK-LABEL: test_mm_extract_epi8 161 // CHECK: extractelement <16 x i8> %{{.*}}, i32 0 162 return _mm_extract_epi8(x, 16); 163 } 164 165 int test_mm_extract_epi32(__m128i x) { 166 // CHECK-LABEL: test_mm_extract_epi32 167 // CHECK: extractelement <4 x i32> %{{.*}}, i32 1 168 return _mm_extract_epi32(x, 1); 169 } 170 171 long long test_mm_extract_epi64(__m128i x) { 172 // CHECK-LABEL: test_mm_extract_epi64 173 // CHECK: extractelement <2 x i64> %{{.*}}, i32 1 174 return _mm_extract_epi64(x, 1); 175 } 176 177 //TODO 178 //int test_mm_extract_ps(__m128i x) { 179 // return _mm_extract_ps(_mm_add_ps(x,x), 1); 180 //} 181 182 __m128d test_mm_floor_pd(__m128d x) { 183 // CHECK-LABEL: test_mm_floor_pd 184 // CHECK: call <2 x double> @llvm.x86.sse41.round.pd 185 return _mm_floor_pd(x); 186 } 187 188 __m128 test_mm_floor_ps(__m128 x) { 189 // CHECK-LABEL: test_mm_floor_ps 190 // CHECK: call <4 x float> @llvm.x86.sse41.round.ps 191 return _mm_floor_ps(x); 192 } 193 194 __m128d test_mm_floor_sd(__m128d x, __m128d y) { 195 // CHECK-LABEL: test_mm_floor_sd 196 // CHECK: call <2 x double> @llvm.x86.sse41.round.sd 197 return _mm_floor_sd(x, y); 198 } 199 200 __m128 test_mm_floor_ss(__m128 x, __m128 y) { 201 // CHECK-LABEL: test_mm_floor_ss 202 // CHECK: call <4 x float> @llvm.x86.sse41.round.ss 203 return _mm_floor_ss(x, y); 204 } 205 206 __m128i test_mm_insert_epi8(__m128i x, char b) { 207 // CHECK-LABEL: test_mm_insert_epi8 208 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 0 209 return _mm_insert_epi8(x, b, 16); 210 } 211 212 __m128i test_mm_insert_epi32(__m128i x, int b) { 213 // CHECK-LABEL: test_mm_insert_epi32 214 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0 215 return _mm_insert_epi32(x, b, 4); 216 } 217 218 __m128i test_mm_insert_epi64(__m128i x, long long b) { 219 // CHECK-LABEL: test_mm_insert_epi64 220 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 0 221 return _mm_insert_epi64(x, b, 2); 222 } 223 224 __m128 test_mm_insert_ps(__m128 x, __m128 y) { 225 // CHECK-LABEL: test_mm_insert_ps 226 // CHECK: call <4 x float> @llvm.x86.sse41.insertps 227 return _mm_insert_ps(x, y, 5); 228 } 229 230 __m128i test_mm_max_epi8(__m128i x, __m128i y) { 231 // CHECK-LABEL: test_mm_max_epi8 232 // CHECK: call <16 x i8> @llvm.x86.sse41.pmaxsb 233 return _mm_max_epi8(x, y); 234 } 235 236 __m128i test_mm_max_epu16(__m128i x, __m128i y) { 237 // CHECK-LABEL: test_mm_max_epu16 238 // CHECK: call <8 x i16> @llvm.x86.sse41.pmaxuw 239 return _mm_max_epu16(x, y); 240 } 241 242 __m128i test_mm_max_epi32(__m128i x, __m128i y) { 243 // CHECK-LABEL: test_mm_max_epi32 244 // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxsd 245 return _mm_max_epi32(x, y); 246 } 247 248 __m128i test_mm_max_epu32(__m128i x, __m128i y) { 249 // CHECK-LABEL: test_mm_max_epu32 250 // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxud 251 return _mm_max_epu32(x, y); 252 } 253 254 __m128i test_mm_min_epi8(__m128i x, __m128i y) { 255 // CHECK-LABEL: test_mm_min_epi8 256 // CHECK: call <16 x i8> @llvm.x86.sse41.pminsb 257 return _mm_min_epi8(x, y); 258 } 259 260 __m128i test_mm_min_epu16(__m128i x, __m128i y) { 261 // CHECK-LABEL: test_mm_min_epu16 262 // CHECK: call <8 x i16> @llvm.x86.sse41.pminuw 263 return _mm_min_epu16(x, y); 264 } 265 266 __m128i test_mm_min_epi32(__m128i x, __m128i y) { 267 // CHECK-LABEL: test_mm_min_epi32 268 // CHECK: call <4 x i32> @llvm.x86.sse41.pminsd 269 return _mm_min_epi32(x, y); 270 } 271 272 __m128i test_mm_min_epu32(__m128i x, __m128i y) { 273 // CHECK-LABEL: test_mm_min_epu32 274 // CHECK: call <4 x i32> @llvm.x86.sse41.pminud 275 return _mm_min_epu32(x, y); 276 } 277 278 __m128i test_mm_minpos_epu16(__m128i x) { 279 // CHECK-LABEL: test_mm_minpos_epu16 280 // CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw 281 return _mm_minpos_epu16(x); 282 } 283 284 __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) { 285 // CHECK-LABEL: test_mm_mpsadbw_epu8 286 // CHECK: call <8 x i16> @llvm.x86.sse41.mpsadbw 287 return _mm_mpsadbw_epu8(x, y, 1); 288 } 289 290 __m128i test_mm_mul_epi32(__m128i x, __m128i y) { 291 // CHECK-LABEL: test_mm_mul_epi32 292 // CHECK: call <2 x i64> @llvm.x86.sse41.pmuldq 293 return _mm_mul_epi32(x, y); 294 } 295 296 __m128i test_mm_mullo_epi32(__m128i x, __m128i y) { 297 // CHECK-LABEL: test_mm_mullo_epi32 298 // CHECK: mul <4 x i32> 299 return _mm_mullo_epi32(x, y); 300 } 301 302 __m128i test_mm_packus_epi32(__m128i x, __m128i y) { 303 // CHECK-LABEL: test_mm_packus_epi32 304 // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw 305 return _mm_packus_epi32(x, y); 306 } 307 308 __m128d test_mm_round_pd(__m128d x) { 309 // CHECK-LABEL: test_mm_round_pd 310 // CHECK: call <2 x double> @llvm.x86.sse41.round.pd 311 return _mm_round_pd(x, 2); 312 } 313 314 __m128 test_mm_round_ps(__m128 x) { 315 // CHECK-LABEL: test_mm_round_ps 316 // CHECK: call <4 x float> @llvm.x86.sse41.round.ps 317 return _mm_round_ps(x, 2); 318 } 319 320 __m128d test_mm_round_sd(__m128d x, __m128d y) { 321 // CHECK-LABEL: test_mm_round_sd 322 // CHECK: call <2 x double> @llvm.x86.sse41.round.sd 323 return _mm_round_sd(x, y, 2); 324 } 325 326 __m128 test_mm_round_ss(__m128 x, __m128 y) { 327 // CHECK-LABEL: test_mm_round_ss 328 // CHECK: call <4 x float> @llvm.x86.sse41.round.ss 329 return _mm_round_ss(x, y, 2); 330 } 331 332 __m128i test_mm_stream_load_si128(__m128i const *a) { 333 // CHECK-LABEL: test_mm_stream_load_si128 334 // CHECK: call <2 x i64> @llvm.x86.sse41.movntdqa 335 return _mm_stream_load_si128(a); 336 } 337 338 int test_mm_test_all_ones(__m128i x) { 339 // CHECK-LABEL: test_mm_test_all_ones 340 // CHECK: call i32 @llvm.x86.sse41.ptestc 341 return _mm_test_all_ones(x); 342 } 343 344 int test_mm_test_all_zeros(__m128i x, __m128i y) { 345 // CHECK-LABEL: test_mm_test_all_zeros 346 // CHECK: call i32 @llvm.x86.sse41.ptestz 347 return _mm_test_all_zeros(x, y); 348 } 349 350 int test_mm_test_mix_ones_zeros(__m128i x, __m128i y) { 351 // CHECK-LABEL: test_mm_test_mix_ones_zeros 352 // CHECK: call i32 @llvm.x86.sse41.ptestnzc 353 return _mm_test_mix_ones_zeros(x, y); 354 } 355 356 int test_mm_testc_si128(__m128i x, __m128i y) { 357 // CHECK-LABEL: test_mm_testc_si128 358 // CHECK: call i32 @llvm.x86.sse41.ptestc 359 return _mm_testc_si128(x, y); 360 } 361 362 int test_mm_testnzc_si128(__m128i x, __m128i y) { 363 // CHECK-LABEL: test_mm_testnzc_si128 364 // CHECK: call i32 @llvm.x86.sse41.ptestnzc 365 return _mm_testnzc_si128(x, y); 366 } 367 368 int test_mm_testz_si128(__m128i x, __m128i y) { 369 // CHECK-LABEL: test_mm_testz_si128 370 // CHECK: call i32 @llvm.x86.sse41.ptestz 371 return _mm_testz_si128(x, y); 372 } 373