1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Werror | FileCheck %s 2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s 3 4 // Don't include mm_malloc.h, it's system specific. 5 #define __MM_MALLOC_H 6 7 #include <x86intrin.h> 8 9 __m128i test_mm_add_epi8(__m128i A, __m128i B) { 10 // CHECK-LABEL: test_mm_add_epi8 11 // CHECK: add <16 x i8> 12 return _mm_add_epi8(A, B); 13 } 14 15 __m128i test_mm_add_epi16(__m128i A, __m128i B) { 16 // CHECK-LABEL: test_mm_add_epi16 17 // CHECK: add <8 x i16> 18 return _mm_add_epi16(A, B); 19 } 20 21 __m128i test_mm_add_epi32(__m128i A, __m128i B) { 22 // CHECK-LABEL: test_mm_add_epi32 23 // CHECK: add <4 x i32> 24 return _mm_add_epi32(A, B); 25 } 26 27 __m128i test_mm_add_epi64(__m128i A, __m128i B) { 28 // CHECK-LABEL: test_mm_add_epi64 29 // CHECK: add <2 x i64> 30 return _mm_add_epi64(A, B); 31 } 32 33 __m128d test_mm_add_pd(__m128d A, __m128d B) { 34 // CHECK-LABEL: test_mm_add_pd 35 // CHECK: fadd <2 x double> 36 return _mm_add_pd(A, B); 37 } 38 39 __m128d test_mm_add_sd(__m128d A, __m128d B) { 40 // CHECK-LABEL: test_mm_add_sd 41 // CHECK: fadd double 42 return _mm_add_sd(A, B); 43 } 44 45 __m128i test_mm_adds_epi8(__m128i A, __m128i B) { 46 // CHECK-LABEL: test_mm_adds_epi8 47 // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b 48 return _mm_adds_epi8(A, B); 49 } 50 51 __m128i test_mm_adds_epi16(__m128i A, __m128i B) { 52 // CHECK-LABEL: test_mm_adds_epi16 53 // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w 54 return _mm_adds_epi16(A, B); 55 } 56 57 __m128i test_mm_adds_epu8(__m128i A, __m128i B) { 58 // CHECK-LABEL: test_mm_adds_epu8 59 // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b 60 return _mm_adds_epu8(A, B); 61 } 62 63 __m128i test_mm_adds_epu16(__m128i A, __m128i B) { 64 // CHECK-LABEL: test_mm_adds_epu16 65 // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w 66 return _mm_adds_epu16(A, B); 67 } 68 69 __m128d test_mm_and_pd(__m128d A, __m128d B) { 70 // CHECK-LABEL: test_mm_and_pd 71 // CHECK: and <4 x i32> 72 return _mm_and_pd(A, B); 73 } 74 75 __m128i test_mm_and_si128(__m128i A, __m128i B) { 76 // CHECK-LABEL: test_mm_and_si128 77 // CHECK: and <2 x i64> 78 return _mm_and_si128(A, B); 79 } 80 81 __m128i test_mm_avg_epu8(__m128i A, __m128i B) { 82 // CHECK-LABEL: test_mm_avg_epu8 83 // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b 84 return _mm_avg_epu8(A, B); 85 } 86 87 __m128i test_mm_avg_epu16(__m128i A, __m128i B) { 88 // CHECK-LABEL: test_mm_avg_epu16 89 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w 90 return _mm_avg_epu16(A, B); 91 } 92 93 __m128i test_mm_bslli_si128(__m128i A) { 94 // CHECK-LABEL: test_mm_bslli_si128 95 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 96 return _mm_bslli_si128(A, 5); 97 } 98 99 __m128i test_mm_bsrli_si128(__m128i A) { 100 // CHECK-LABEL: test_mm_bsrli_si128 101 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 102 return _mm_bsrli_si128(A, 5); 103 } 104 105 void test_mm_clflush(void* A) { 106 // CHECK-LABEL: test_mm_clflush 107 // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}}) 108 _mm_clflush(A); 109 } 110 111 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) { 112 // CHECK-LABEL: test_mm_cmpeq_epi8 113 // CHECK: icmp eq <16 x i8> 114 return _mm_cmpeq_epi8(A, B); 115 } 116 117 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) { 118 // CHECK-LABEL: test_mm_cmpeq_epi16 119 // CHECK: icmp eq <8 x i16> 120 return _mm_cmpeq_epi16(A, B); 121 } 122 123 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) { 124 // CHECK-LABEL: test_mm_cmpeq_epi32 125 // CHECK: icmp eq <4 x i32> 126 return _mm_cmpeq_epi32(A, B); 127 } 128 129 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) { 130 // CHECK-LABEL: test_mm_cmpeq_pd 131 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0) 132 return _mm_cmpeq_pd(A, B); 133 } 134 135 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) { 136 // CHECK-LABEL: test_mm_cmpeq_sd 137 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0) 138 return _mm_cmpeq_sd(A, B); 139 } 140 141 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) { 142 // CHECK-LABEL: test_mm_cmpge_pd 143 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2) 144 return _mm_cmpge_pd(A, B); 145 } 146 147 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) { 148 // CHECK-LABEL: test_mm_cmpge_sd 149 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2) 150 return _mm_cmpge_sd(A, B); 151 } 152 153 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) { 154 // CHECK-LABEL: test_mm_cmpgt_epi8 155 // CHECK: icmp sgt <16 x i8> 156 return _mm_cmpgt_epi8(A, B); 157 } 158 159 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) { 160 // CHECK-LABEL: test_mm_cmpgt_epi16 161 // CHECK: icmp sgt <8 x i16> 162 return _mm_cmpgt_epi16(A, B); 163 } 164 165 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) { 166 // CHECK-LABEL: test_mm_cmpgt_epi32 167 // CHECK: icmp sgt <4 x i32> 168 return _mm_cmpgt_epi32(A, B); 169 } 170 171 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) { 172 // CHECK-LABEL: test_mm_cmpgt_pd 173 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1) 174 return _mm_cmpgt_pd(A, B); 175 } 176 177 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) { 178 // CHECK-LABEL: test_mm_cmpgt_sd 179 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1) 180 return _mm_cmpgt_sd(A, B); 181 } 182 183 __m128d test_mm_cmple_pd(__m128d A, __m128d B) { 184 // CHECK-LABEL: test_mm_cmple_pd 185 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2) 186 return _mm_cmple_pd(A, B); 187 } 188 189 __m128d test_mm_cmple_sd(__m128d A, __m128d B) { 190 // CHECK-LABEL: test_mm_cmple_sd 191 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2) 192 return _mm_cmple_sd(A, B); 193 } 194 195 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) { 196 // CHECK-LABEL: test_mm_cmplt_epi8 197 // CHECK: icmp sgt <16 x i8> 198 return _mm_cmplt_epi8(A, B); 199 } 200 201 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) { 202 // CHECK-LABEL: test_mm_cmplt_epi16 203 // CHECK: icmp sgt <8 x i16> 204 return _mm_cmplt_epi16(A, B); 205 } 206 207 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) { 208 // CHECK-LABEL: test_mm_cmplt_epi32 209 // CHECK: icmp sgt <4 x i32> 210 return _mm_cmplt_epi32(A, B); 211 } 212 213 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) { 214 // CHECK-LABEL: test_mm_cmplt_pd 215 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1) 216 return _mm_cmplt_pd(A, B); 217 } 218 219 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) { 220 // CHECK-LABEL: test_mm_cmplt_sd 221 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1) 222 return _mm_cmplt_sd(A, B); 223 } 224 225 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) { 226 // CHECK-LABEL: test_mm_cmpneq_pd 227 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4) 228 return _mm_cmpneq_pd(A, B); 229 } 230 231 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) { 232 // CHECK-LABEL: test_mm_cmpneq_sd 233 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4) 234 return _mm_cmpneq_sd(A, B); 235 } 236 237 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) { 238 // CHECK-LABEL: test_mm_cmpnge_pd 239 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6) 240 return _mm_cmpnge_pd(A, B); 241 } 242 243 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) { 244 // CHECK-LABEL: test_mm_cmpnge_sd 245 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6) 246 return _mm_cmpnge_sd(A, B); 247 } 248 249 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) { 250 // CHECK-LABEL: test_mm_cmpngt_pd 251 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5) 252 return _mm_cmpngt_pd(A, B); 253 } 254 255 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) { 256 // CHECK-LABEL: test_mm_cmpngt_sd 257 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5) 258 return _mm_cmpngt_sd(A, B); 259 } 260 261 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) { 262 // CHECK-LABEL: test_mm_cmpnle_pd 263 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6) 264 return _mm_cmpnle_pd(A, B); 265 } 266 267 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) { 268 // CHECK-LABEL: test_mm_cmpnle_sd 269 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6) 270 return _mm_cmpnle_sd(A, B); 271 } 272 273 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) { 274 // CHECK-LABEL: test_mm_cmpnlt_pd 275 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5) 276 return _mm_cmpnlt_pd(A, B); 277 } 278 279 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) { 280 // CHECK-LABEL: test_mm_cmpnlt_sd 281 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5) 282 return _mm_cmpnlt_sd(A, B); 283 } 284 285 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) { 286 // CHECK-LABEL: test_mm_cmpord_pd 287 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7) 288 return _mm_cmpord_pd(A, B); 289 } 290 291 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) { 292 // CHECK-LABEL: test_mm_cmpord_sd 293 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7) 294 return _mm_cmpord_sd(A, B); 295 } 296 297 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) { 298 // CHECK-LABEL: test_mm_cmpunord_pd 299 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3) 300 return _mm_cmpunord_pd(A, B); 301 } 302 303 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) { 304 // CHECK-LABEL: test_mm_cmpunord_sd 305 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3) 306 return _mm_cmpunord_sd(A, B); 307 } 308 309 int test_mm_comieq_sd(__m128d A, __m128d B) { 310 // CHECK-LABEL: test_mm_comieq_sd 311 // CHECK: call i32 @llvm.x86.sse2.comieq.sd 312 return _mm_comieq_sd(A, B); 313 } 314 315 int test_mm_comige_sd(__m128d A, __m128d B) { 316 // CHECK-LABEL: test_mm_comige_sd 317 // CHECK: call i32 @llvm.x86.sse2.comige.sd 318 return _mm_comige_sd(A, B); 319 } 320 321 int test_mm_comigt_sd(__m128d A, __m128d B) { 322 // CHECK-LABEL: test_mm_comigt_sd 323 // CHECK: call i32 @llvm.x86.sse2.comigt.sd 324 return _mm_comigt_sd(A, B); 325 } 326 327 int test_mm_comile_sd(__m128d A, __m128d B) { 328 // CHECK-LABEL: test_mm_comile_sd 329 // CHECK: call i32 @llvm.x86.sse2.comile.sd 330 return _mm_comile_sd(A, B); 331 } 332 333 int test_mm_comilt_sd(__m128d A, __m128d B) { 334 // CHECK-LABEL: test_mm_comilt_sd 335 // CHECK: call i32 @llvm.x86.sse2.comilt.sd 336 return _mm_comilt_sd(A, B); 337 } 338 339 int test_mm_comineq_sd(__m128d A, __m128d B) { 340 // CHECK-LABEL: test_mm_comineq_sd 341 // CHECK: call i32 @llvm.x86.sse2.comineq.sd 342 return _mm_comineq_sd(A, B); 343 } 344 345 __m128d test_mm_cvtepi32_pd(__m128i A) { 346 // CHECK-LABEL: test_mm_cvtepi32_pd 347 // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd 348 return _mm_cvtepi32_pd(A); 349 } 350 351 __m128 test_mm_cvtepi32_ps(__m128i A) { 352 // CHECK-LABEL: test_mm_cvtepi32_ps 353 // CHECK: call <4 x float> @llvm.x86.sse2.cvtdq2ps 354 return _mm_cvtepi32_ps(A); 355 } 356 357 __m128i test_mm_cvtpd_epi32(__m128d A) { 358 // CHECK-LABEL: test_mm_cvtpd_epi32 359 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq 360 return _mm_cvtpd_epi32(A); 361 } 362 363 __m128 test_mm_cvtpd_ps(__m128d A) { 364 // CHECK-LABEL: test_mm_cvtpd_ps 365 // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps 366 return _mm_cvtpd_ps(A); 367 } 368 369 __m128i test_mm_cvtps_epi32(__m128 A) { 370 // CHECK-LABEL: test_mm_cvtps_epi32 371 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq 372 return _mm_cvtps_epi32(A); 373 } 374 375 __m128d test_mm_cvtps_pd(__m128 A) { 376 // CHECK-LABEL: test_mm_cvtps_pd 377 // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd 378 return _mm_cvtps_pd(A); 379 } 380 381 double test_mm_cvtsd_f64(__m128d A) { 382 // CHECK-LABEL: test_mm_cvtsd_f64 383 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 384 return _mm_cvtsd_f64(A); 385 } 386 387 int test_mm_cvtsd_si32(__m128d A) { 388 // CHECK-LABEL: test_mm_cvtsd_si32 389 // CHECK: call i32 @llvm.x86.sse2.cvtsd2si 390 return _mm_cvtsd_si32(A); 391 } 392 393 long long test_mm_cvtsd_si64(__m128d A) { 394 // CHECK-LABEL: test_mm_cvtsd_si64 395 // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64 396 return _mm_cvtsd_si64(A); 397 } 398 399 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) { 400 // CHECK-LABEL: test_mm_cvtsd_ss 401 // CHECK: fptrunc double %{{.*}} to float 402 return _mm_cvtsd_ss(A, B); 403 } 404 405 int test_mm_cvtsi128_si32(__m128i A) { 406 // CHECK-LABEL: test_mm_cvtsi128_si32 407 // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 408 return _mm_cvtsi128_si32(A); 409 } 410 411 long long test_mm_cvtsi128_si64(__m128i A) { 412 // CHECK-LABEL: test_mm_cvtsi128_si64 413 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0 414 return _mm_cvtsi128_si64(A); 415 } 416 417 __m128d test_mm_cvtsi32_sd(__m128d A, int B) { 418 // CHECK-LABEL: test_mm_cvtsi32_sd 419 // CHECK: sitofp i32 %{{.*}} to double 420 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 421 return _mm_cvtsi32_sd(A, B); 422 } 423 424 __m128i test_mm_cvtsi32_si128(int A) { 425 // CHECK-LABEL: test_mm_cvtsi32_si128 426 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 427 return _mm_cvtsi32_si128(A); 428 } 429 430 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) { 431 // CHECK-LABEL: test_mm_cvtsi64_sd 432 // CHECK: sitofp i64 %{{.*}} to double 433 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 434 return _mm_cvtsi64_sd(A, B); 435 } 436 437 __m128i test_mm_cvtsi64_si128(long long A) { 438 // CHECK-LABEL: test_mm_cvtsi64_si128 439 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 440 return _mm_cvtsi64_si128(A); 441 } 442 443 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) { 444 // CHECK-LABEL: test_mm_cvtss_sd 445 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 446 // CHECK: fpext float %{{.*}} to double 447 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 448 return _mm_cvtss_sd(A, B); 449 } 450 451 __m128i test_mm_cvttpd_epi32(__m128d A) { 452 // CHECK-LABEL: test_mm_cvttpd_epi32 453 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq 454 return _mm_cvttpd_epi32(A); 455 } 456 457 __m128i test_mm_cvttps_epi32(__m128 A) { 458 // CHECK-LABEL: test_mm_cvttps_epi32 459 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq 460 return _mm_cvttps_epi32(A); 461 } 462 463 int test_mm_cvttsd_si32(__m128d A) { 464 // CHECK-LABEL: test_mm_cvttsd_si32 465 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 466 // CHECK: fptosi double %{{.*}} to i32 467 return _mm_cvttsd_si32(A); 468 } 469 470 long long test_mm_cvttsd_si64(__m128d A) { 471 // CHECK-LABEL: test_mm_cvttsd_si64 472 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 473 // CHECK: fptosi double %{{.*}} to i64 474 return _mm_cvttsd_si64(A); 475 } 476 477 __m128d test_mm_div_pd(__m128d A, __m128d B) { 478 // CHECK-LABEL: test_mm_div_pd 479 // CHECK: fdiv <2 x double> 480 return _mm_div_pd(A, B); 481 } 482 483 __m128d test_mm_div_sd(__m128d A, __m128d B) { 484 // CHECK-LABEL: test_mm_div_sd 485 // CHECK: fdiv double 486 return _mm_div_sd(A, B); 487 } 488 489 // Lowering to pextrw requires optimization. 490 int test_mm_extract_epi16(__m128i A) { 491 // CHECK-LABEL: test_mm_extract_epi16 492 // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7 493 // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]] 494 return _mm_extract_epi16(A, 8); 495 } 496 497 __m128i test_mm_insert_epi16(__m128i A, short B) { 498 // CHECK-LABEL: test_mm_insert_epi16 499 // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7 500 // CHECK: insertelement <8 x i16> %{{.*}}, i32 [[x]] 501 return _mm_insert_epi16(A, B, 8); 502 } 503 504 void test_mm_lfence() { 505 // CHECK-LABEL: test_mm_lfence 506 // CHECK: call void @llvm.x86.sse2.lfence() 507 _mm_lfence(); 508 } 509 510 __m128d test_mm_load_pd(double const* A) { 511 // CHECK-LABEL: test_mm_load_pd 512 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16 513 return _mm_load_pd(A); 514 } 515 516 __m128d test_mm_load_sd(double const* A) { 517 // CHECK-LABEL: test_mm_load_sd 518 // CHECK: load double, double* %{{.*}}, align 1 519 return _mm_load_sd(A); 520 } 521 522 __m128i test_mm_load_si128(__m128i const* A) { 523 // CHECK-LABEL: test_mm_load_si128 524 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 525 return _mm_load_si128(A); 526 } 527 528 __m128d test_mm_load1_pd(double const* A) { 529 // CHECK-LABEL: test_mm_load1_pd 530 // CHECK: load double, double* %{{.*}}, align 8 531 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 532 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 533 return _mm_load1_pd(A); 534 } 535 536 __m128d test_mm_loadh_pd(__m128d x, void* y) { 537 // CHECK-LABEL: test_mm_loadh_pd 538 // CHECK: load double, double* %{{.*}}, align 1{{$}} 539 return _mm_loadh_pd(x, y); 540 } 541 542 __m128d test_mm_loadr_pd(double const* A) { 543 // CHECK-LABEL: test_mm_loadr_pd 544 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16 545 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0> 546 return _mm_loadr_pd(A); 547 } 548 549 __m128d test_mm_loadu_pd(double const* A) { 550 // CHECK-LABEL: test_mm_loadu_pd 551 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1 552 return _mm_loadu_pd(A); 553 } 554 555 __m128i test_mm_loadu_si128(__m128i const* A) { 556 // CHECK-LABEL: test_mm_loadu_si128 557 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1 558 return _mm_loadu_si128(A); 559 } 560 561 __m128i test_mm_madd_epi16(__m128i A, __m128i B) { 562 // CHECK-LABEL: test_mm_madd_epi16 563 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 564 return _mm_madd_epi16(A, B); 565 } 566 567 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) { 568 // CHECK-LABEL: test_mm_maskmoveu_si128 569 // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}}) 570 _mm_maskmoveu_si128(A, B, C); 571 } 572 573 __m128i test_mm_max_epi16(__m128i A, __m128i B) { 574 // CHECK-LABEL: test_mm_max_epi16 575 // CHECK: call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 576 return _mm_max_epi16(A, B); 577 } 578 579 __m128i test_mm_max_epu8(__m128i A, __m128i B) { 580 // CHECK-LABEL: test_mm_max_epu8 581 // CHECK: call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 582 return _mm_max_epu8(A, B); 583 } 584 585 __m128d test_mm_max_pd(__m128d A, __m128d B) { 586 // CHECK-LABEL: test_mm_max_pd 587 // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 588 return _mm_max_pd(A, B); 589 } 590 591 __m128d test_mm_max_sd(__m128d A, __m128d B) { 592 // CHECK-LABEL: test_mm_max_sd 593 // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 594 return _mm_max_sd(A, B); 595 } 596 597 void test_mm_mfence() { 598 // CHECK-LABEL: test_mm_mfence 599 // CHECK: call void @llvm.x86.sse2.mfence() 600 _mm_mfence(); 601 } 602 603 __m128i test_mm_min_epi16(__m128i A, __m128i B) { 604 // CHECK-LABEL: test_mm_min_epi16 605 // CHECK: call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 606 return _mm_min_epi16(A, B); 607 } 608 609 __m128i test_mm_min_epu8(__m128i A, __m128i B) { 610 // CHECK-LABEL: test_mm_min_epu8 611 // CHECK: call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 612 return _mm_min_epu8(A, B); 613 } 614 615 __m128d test_mm_min_pd(__m128d A, __m128d B) { 616 // CHECK-LABEL: test_mm_min_pd 617 // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 618 return _mm_min_pd(A, B); 619 } 620 621 __m128d test_mm_min_sd(__m128d A, __m128d B) { 622 // CHECK-LABEL: test_mm_min_sd 623 // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 624 return _mm_min_sd(A, B); 625 } 626 627 int test_mm_movemask_epi8(__m128i A) { 628 // CHECK-LABEL: test_mm_movemask_epi8 629 // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}}) 630 return _mm_movemask_epi8(A); 631 } 632 633 int test_mm_movemask_pd(__m128d A) { 634 // CHECK-LABEL: test_mm_movemask_pd 635 // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}}) 636 return _mm_movemask_pd(A); 637 } 638 639 __m128i test_mm_mul_epu32(__m128i A, __m128i B) { 640 // CHECK-LABEL: test_mm_mul_epu32 641 // CHECK: call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 642 return _mm_mul_epu32(A, B); 643 } 644 645 __m128d test_mm_mul_pd(__m128d A, __m128d B) { 646 // CHECK-LABEL: test_mm_mul_pd 647 // CHECK: fmul <2 x double> %{{.*}}, %{{.*}} 648 return _mm_mul_pd(A, B); 649 } 650 651 __m128d test_mm_mul_sd(__m128d A, __m128d B) { 652 // CHECK-LABEL: test_mm_mul_sd 653 // CHECK: fmul double %{{.*}}, %{{.*}} 654 return _mm_mul_sd(A, B); 655 } 656 657 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) { 658 // CHECK-LABEL: test_mm_mulhi_epi16 659 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 660 return _mm_mulhi_epi16(A, B); 661 } 662 663 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) { 664 // CHECK-LABEL: test_mm_mulhi_epu16 665 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 666 return _mm_mulhi_epu16(A, B); 667 } 668 669 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) { 670 // CHECK-LABEL: test_mm_mullo_epi16 671 // CHECK: mul <8 x i16> %{{.*}}, %{{.*}} 672 return _mm_mullo_epi16(A, B); 673 } 674 675 __m128d test_mm_or_pd(__m128d A, __m128d B) { 676 // CHECK-LABEL: test_mm_or_pd 677 // CHECK: or <4 x i32> %{{.*}}, %{{.*}} 678 return _mm_or_pd(A, B); 679 } 680 681 __m128i test_mm_or_si128(__m128i A, __m128i B) { 682 // CHECK-LABEL: test_mm_or_si128 683 // CHECK: or <2 x i64> %{{.*}}, %{{.*}} 684 return _mm_or_si128(A, B); 685 } 686 687 __m128i test_mm_packs_epi16(__m128i A, __m128i B) { 688 // CHECK-LABEL: test_mm_packs_epi16 689 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 690 return _mm_packs_epi16(A, B); 691 } 692 693 __m128i test_mm_packs_epi32(__m128i A, __m128i B) { 694 // CHECK-LABEL: test_mm_packs_epi32 695 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 696 return _mm_packs_epi32(A, B); 697 } 698 699 __m128i test_mm_packus_epi16(__m128i A, __m128i B) { 700 // CHECK-LABEL: test_mm_packus_epi16 701 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 702 return _mm_packus_epi16(A, B); 703 } 704 705 void test_mm_pause() { 706 // CHECK-LABEL: test_mm_pause 707 // CHECK: call void @llvm.x86.sse2.pause() 708 return _mm_pause(); 709 } 710 711 __m128i test_mm_sad_epu8(__m128i A, __m128i B) { 712 // CHECK-LABEL: test_mm_sad_epu8 713 // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 714 return _mm_sad_epu8(A, B); 715 } 716 717 __m128d test_mm_setzero_pd() { 718 // CHECK-LABEL: test_mm_setzero_pd 719 // CHECK: store <2 x double> zeroinitializer 720 return _mm_setzero_pd(); 721 } 722 723 __m128i test_mm_setzero_si128() { 724 // CHECK-LABEL: test_mm_setzero_si128 725 // CHECK: store <2 x i64> zeroinitializer 726 return _mm_setzero_si128(); 727 } 728 729 __m128i test_mm_shuffle_epi32(__m128i A) { 730 // CHECK-LABEL: test_mm_shuffle_epi32 731 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer 732 return _mm_shuffle_epi32(A, 0); 733 } 734 735 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) { 736 // CHECK-LABEL: test_mm_shuffle_pd 737 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2> 738 return _mm_shuffle_pd(A, B, 1); 739 } 740 741 __m128i test_mm_shufflehi_epi16(__m128i A) { 742 // CHECK-LABEL: test_mm_shufflehi_epi16 743 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> 744 return _mm_shufflehi_epi16(A, 0); 745 } 746 747 __m128i test_mm_shufflelo_epi16(__m128i A) { 748 // CHECK-LABEL: test_mm_shufflelo_epi16 749 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 750 return _mm_shufflelo_epi16(A, 0); 751 } 752 753 __m128i test_mm_sll_epi16(__m128i A, __m128i B) { 754 // CHECK-LABEL: test_mm_sll_epi16 755 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w 756 return _mm_sll_epi16(A, B); 757 } 758 759 __m128i test_mm_sll_epi32(__m128i A, __m128i B) { 760 // CHECK-LABEL: test_mm_sll_epi32 761 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d 762 return _mm_sll_epi32(A, B); 763 } 764 765 __m128i test_mm_sll_epi64(__m128i A, __m128i B) { 766 // CHECK-LABEL: test_mm_sll_epi64 767 // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q 768 return _mm_sll_epi64(A, B); 769 } 770 771 __m128i test_mm_slli_epi16(__m128i A) { 772 // CHECK-LABEL: test_mm_slli_epi16 773 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w 774 return _mm_slli_epi16(A, 1); 775 } 776 777 __m128i test_mm_slli_epi32(__m128i A) { 778 // CHECK-LABEL: test_mm_slli_epi32 779 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d 780 return _mm_slli_epi32(A, 1); 781 } 782 783 __m128i test_mm_slli_epi64(__m128i A) { 784 // CHECK-LABEL: test_mm_slli_epi64 785 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q 786 return _mm_slli_epi64(A, 1); 787 } 788 789 __m128i test_mm_slli_si128(__m128i A) { 790 // CHECK-LABEL: test_mm_slli_si128 791 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 792 return _mm_slli_si128(A, 5); 793 } 794 795 __m128d test_mm_sqrt_pd(__m128d A) { 796 // CHECK-LABEL: test_mm_sqrt_pd 797 // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %{{.*}}) 798 return _mm_sqrt_pd(A); 799 } 800 801 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) { 802 // CHECK-LABEL: test_mm_sqrt_sd 803 // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}}) 804 return _mm_sqrt_sd(A, B); 805 } 806 807 __m128i test_mm_sra_epi16(__m128i A, __m128i B) { 808 // CHECK-LABEL: test_mm_sra_epi16 809 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w 810 return _mm_sra_epi16(A, B); 811 } 812 813 __m128i test_mm_sra_epi32(__m128i A, __m128i B) { 814 // CHECK-LABEL: test_mm_sra_epi32 815 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d 816 return _mm_sra_epi32(A, B); 817 } 818 819 __m128i test_mm_srai_epi16(__m128i A) { 820 // CHECK-LABEL: test_mm_srai_epi16 821 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w 822 return _mm_srai_epi16(A, 1); 823 } 824 825 __m128i test_mm_srai_epi32(__m128i A) { 826 // CHECK-LABEL: test_mm_srai_epi32 827 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d 828 return _mm_srai_epi32(A, 1); 829 } 830 831 __m128i test_mm_srl_epi16(__m128i A, __m128i B) { 832 // CHECK-LABEL: test_mm_srl_epi16 833 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w 834 return _mm_srl_epi16(A, B); 835 } 836 837 __m128i test_mm_srl_epi32(__m128i A, __m128i B) { 838 // CHECK-LABEL: test_mm_srl_epi32 839 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d 840 return _mm_srl_epi32(A, B); 841 } 842 843 __m128i test_mm_srl_epi64(__m128i A, __m128i B) { 844 // CHECK-LABEL: test_mm_srl_epi64 845 // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q 846 return _mm_srl_epi64(A, B); 847 } 848 849 __m128i test_mm_srli_epi16(__m128i A) { 850 // CHECK-LABEL: test_mm_srli_epi16 851 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w 852 return _mm_srli_epi16(A, 1); 853 } 854 855 __m128i test_mm_srli_epi32(__m128i A) { 856 // CHECK-LABEL: test_mm_srli_epi32 857 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d 858 return _mm_srli_epi32(A, 1); 859 } 860 861 __m128i test_mm_srli_epi64(__m128i A) { 862 // CHECK-LABEL: test_mm_srli_epi64 863 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q 864 return _mm_srli_epi64(A, 1); 865 } 866 867 __m128i test_mm_srli_si128(__m128i A) { 868 // CHECK-LABEL: test_mm_srli_si128 869 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 870 return _mm_srli_si128(A, 5); 871 } 872 873 void test_mm_store_pd(double* A, __m128d B) { 874 // CHECK-LABEL: test_mm_store_pd 875 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16 876 _mm_store_pd(A, B); 877 } 878 879 void test_mm_store_sd(double* A, __m128d B) { 880 // CHECK-LABEL: test_mm_store_sd 881 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}} 882 _mm_store_sd(A, B); 883 } 884 885 void test_mm_store_si128(__m128i* A, __m128i B) { 886 // CHECK-LABEL: test_mm_store_si128 887 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 888 _mm_store_si128(A, B); 889 } 890 891 void test_mm_storeh_pd(double* A, __m128d B) { 892 // CHECK-LABEL: test_mm_storeh_pd 893 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1 894 _mm_storeh_pd(A, B); 895 } 896 897 void test_mm_storel_pd(double* A, __m128d B) { 898 // CHECK-LABEL: test_mm_storel_pd 899 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1 900 _mm_storel_pd(A, B); 901 } 902 903 void test_mm_storeu_pd(double* A, __m128d B) { 904 // CHECK-LABEL: test_mm_storeu_pd 905 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1 906 _mm_storeu_pd(A, B); 907 } 908 909 void test_mm_storeu_si128(__m128i* A, __m128i B) { 910 // CHECK-LABEL: test_mm_storeu_si128 911 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1 912 _mm_storeu_si128(A, B); 913 } 914 915 void test_mm_stream_pd(double *A, __m128d B) { 916 // CHECK-LABEL: test_mm_stream_pd 917 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal 918 _mm_stream_pd(A, B); 919 } 920 921 void test_mm_stream_si32(int *A, int B) { 922 // CHECK-LABEL: test_mm_stream_si32 923 // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal 924 _mm_stream_si32(A, B); 925 } 926 927 void test_mm_stream_si64(long long *A, long long B) { 928 // CHECK-LABEL: test_mm_stream_si64 929 // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal 930 _mm_stream_si64(A, B); 931 } 932 933 void test_mm_stream_si128(__m128i *A, __m128i B) { 934 // CHECK-LABEL: test_mm_stream_si128 935 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal 936 _mm_stream_si128(A, B); 937 } 938 939 __m128i test_mm_sub_epi8(__m128i A, __m128i B) { 940 // CHECK-LABEL: test_mm_sub_epi8 941 // CHECK: sub <16 x i8> 942 return _mm_sub_epi8(A, B); 943 } 944 945 __m128i test_mm_sub_epi16(__m128i A, __m128i B) { 946 // CHECK-LABEL: test_mm_sub_epi16 947 // CHECK: sub <8 x i16> 948 return _mm_sub_epi16(A, B); 949 } 950 951 __m128i test_mm_sub_epi32(__m128i A, __m128i B) { 952 // CHECK-LABEL: test_mm_sub_epi32 953 // CHECK: sub <4 x i32> 954 return _mm_sub_epi32(A, B); 955 } 956 957 __m128i test_mm_sub_epi64(__m128i A, __m128i B) { 958 // CHECK-LABEL: test_mm_sub_epi64 959 // CHECK: sub <2 x i64> 960 return _mm_sub_epi64(A, B); 961 } 962 963 __m128d test_mm_sub_pd(__m128d A, __m128d B) { 964 // CHECK-LABEL: test_mm_sub_pd 965 // CHECK: fsub <2 x double> 966 return _mm_sub_pd(A, B); 967 } 968 969 __m128d test_mm_sub_sd(__m128d A, __m128d B) { 970 // CHECK-LABEL: test_mm_sub_sd 971 // CHECK: fsub double 972 return _mm_sub_sd(A, B); 973 } 974 975 __m128i test_mm_subs_epi8(__m128i A, __m128i B) { 976 // CHECK-LABEL: test_mm_subs_epi8 977 // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b 978 return _mm_subs_epi8(A, B); 979 } 980 981 __m128i test_mm_subs_epi16(__m128i A, __m128i B) { 982 // CHECK-LABEL: test_mm_subs_epi16 983 // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w 984 return _mm_subs_epi16(A, B); 985 } 986 987 __m128i test_mm_subs_epu8(__m128i A, __m128i B) { 988 // CHECK-LABEL: test_mm_subs_epu8 989 // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b 990 return _mm_subs_epu8(A, B); 991 } 992 993 __m128i test_mm_subs_epu16(__m128i A, __m128i B) { 994 // CHECK-LABEL: test_mm_subs_epu16 995 // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w 996 return _mm_subs_epu16(A, B); 997 } 998 999 int test_mm_ucomieq_sd(__m128d A, __m128d B) { 1000 // CHECK-LABEL: test_mm_ucomieq_sd 1001 // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd 1002 return _mm_ucomieq_sd(A, B); 1003 } 1004 1005 int test_mm_ucomige_sd(__m128d A, __m128d B) { 1006 // CHECK-LABEL: test_mm_ucomige_sd 1007 // CHECK: call i32 @llvm.x86.sse2.ucomige.sd 1008 return _mm_ucomige_sd(A, B); 1009 } 1010 1011 int test_mm_ucomigt_sd(__m128d A, __m128d B) { 1012 // CHECK-LABEL: test_mm_ucomigt_sd 1013 // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd 1014 return _mm_ucomigt_sd(A, B); 1015 } 1016 1017 int test_mm_ucomile_sd(__m128d A, __m128d B) { 1018 // CHECK-LABEL: test_mm_ucomile_sd 1019 // CHECK: call i32 @llvm.x86.sse2.ucomile.sd 1020 return _mm_ucomile_sd(A, B); 1021 } 1022 1023 int test_mm_ucomilt_sd(__m128d A, __m128d B) { 1024 // CHECK-LABEL: test_mm_ucomilt_sd 1025 // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd 1026 return _mm_ucomilt_sd(A, B); 1027 } 1028 1029 int test_mm_ucomineq_sd(__m128d A, __m128d B) { 1030 // CHECK-LABEL: test_mm_ucomineq_sd 1031 // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd 1032 return _mm_ucomineq_sd(A, B); 1033 } 1034 1035 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) { 1036 // CHECK-LABEL: test_mm_unpackhi_epi8 1037 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 1038 return _mm_unpackhi_epi8(A, B); 1039 } 1040 1041 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) { 1042 // CHECK-LABEL: test_mm_unpackhi_epi16 1043 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 1044 return _mm_unpackhi_epi16(A, B); 1045 } 1046 1047 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) { 1048 // CHECK-LABEL: test_mm_unpackhi_epi32 1049 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1050 return _mm_unpackhi_epi32(A, B); 1051 } 1052 1053 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) { 1054 // CHECK-LABEL: test_mm_unpackhi_epi64 1055 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3> 1056 return _mm_unpackhi_epi64(A, B); 1057 } 1058 1059 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) { 1060 // CHECK-LABEL: test_mm_unpackhi_pd 1061 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3> 1062 return _mm_unpackhi_pd(A, B); 1063 } 1064 1065 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) { 1066 // CHECK-LABEL: test_mm_unpacklo_epi8 1067 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 1068 return _mm_unpacklo_epi8(A, B); 1069 } 1070 1071 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) { 1072 // CHECK-LABEL: test_mm_unpacklo_epi16 1073 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 1074 return _mm_unpacklo_epi16(A, B); 1075 } 1076 1077 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) { 1078 // CHECK-LABEL: test_mm_unpacklo_epi32 1079 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1080 return _mm_unpacklo_epi32(A, B); 1081 } 1082 1083 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) { 1084 // CHECK-LABEL: test_mm_unpacklo_epi64 1085 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2> 1086 return _mm_unpacklo_epi64(A, B); 1087 } 1088 1089 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) { 1090 // CHECK-LABEL: test_mm_unpacklo_pd 1091 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2> 1092 return _mm_unpacklo_pd(A, B); 1093 } 1094 1095 __m128d test_mm_xor_pd(__m128d A, __m128d B) { 1096 // CHECK-LABEL: test_mm_xor_pd 1097 // CHECK: xor <4 x i32> %{{.*}}, %{{.*}} 1098 return _mm_xor_pd(A, B); 1099 } 1100 1101 __m128i test_mm_xor_si128(__m128i A, __m128i B) { 1102 // CHECK-LABEL: test_mm_xor_si128 1103 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} 1104 return _mm_xor_si128(A, B); 1105 } 1106