1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Werror | FileCheck %s 2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s 3 4 // Don't include mm_malloc.h, it's system specific. 5 #define __MM_MALLOC_H 6 7 #include <x86intrin.h> 8 9 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll 10 11 __m128i test_mm_add_epi8(__m128i A, __m128i B) { 12 // CHECK-LABEL: test_mm_add_epi8 13 // CHECK: add <16 x i8> 14 return _mm_add_epi8(A, B); 15 } 16 17 __m128i test_mm_add_epi16(__m128i A, __m128i B) { 18 // CHECK-LABEL: test_mm_add_epi16 19 // CHECK: add <8 x i16> 20 return _mm_add_epi16(A, B); 21 } 22 23 __m128i test_mm_add_epi32(__m128i A, __m128i B) { 24 // CHECK-LABEL: test_mm_add_epi32 25 // CHECK: add <4 x i32> 26 return _mm_add_epi32(A, B); 27 } 28 29 __m128i test_mm_add_epi64(__m128i A, __m128i B) { 30 // CHECK-LABEL: test_mm_add_epi64 31 // CHECK: add <2 x i64> 32 return _mm_add_epi64(A, B); 33 } 34 35 __m128d test_mm_add_pd(__m128d A, __m128d B) { 36 // CHECK-LABEL: test_mm_add_pd 37 // CHECK: fadd <2 x double> 38 return _mm_add_pd(A, B); 39 } 40 41 __m128d test_mm_add_sd(__m128d A, __m128d B) { 42 // CHECK-LABEL: test_mm_add_sd 43 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 44 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 45 // CHECK: fadd double 46 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 47 return _mm_add_sd(A, B); 48 } 49 50 __m128i test_mm_adds_epi8(__m128i A, __m128i B) { 51 // CHECK-LABEL: test_mm_adds_epi8 52 // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 53 return _mm_adds_epi8(A, B); 54 } 55 56 __m128i test_mm_adds_epi16(__m128i A, __m128i B) { 57 // CHECK-LABEL: test_mm_adds_epi16 58 // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 59 return _mm_adds_epi16(A, B); 60 } 61 62 __m128i test_mm_adds_epu8(__m128i A, __m128i B) { 63 // CHECK-LABEL: test_mm_adds_epu8 64 // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 65 return _mm_adds_epu8(A, B); 66 } 67 68 __m128i test_mm_adds_epu16(__m128i A, __m128i B) { 69 // CHECK-LABEL: test_mm_adds_epu16 70 // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 71 return _mm_adds_epu16(A, B); 72 } 73 74 __m128d test_mm_and_pd(__m128d A, __m128d B) { 75 // CHECK-LABEL: test_mm_and_pd 76 // CHECK: and <4 x i32> 77 return _mm_and_pd(A, B); 78 } 79 80 __m128i test_mm_and_si128(__m128i A, __m128i B) { 81 // CHECK-LABEL: test_mm_and_si128 82 // CHECK: and <2 x i64> 83 return _mm_and_si128(A, B); 84 } 85 86 __m128d test_mm_andnot_pd(__m128d A, __m128d B) { 87 // CHECK-LABEL: test_mm_andnot_pd 88 // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1> 89 // CHECK: and <4 x i32> 90 return _mm_andnot_pd(A, B); 91 } 92 93 __m128i test_mm_andnot_si128(__m128i A, __m128i B) { 94 // CHECK-LABEL: test_mm_andnot_si128 95 // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1> 96 // CHECK: and <2 x i64> 97 return _mm_andnot_si128(A, B); 98 } 99 100 __m128i test_mm_avg_epu8(__m128i A, __m128i B) { 101 // CHECK-LABEL: test_mm_avg_epu8 102 // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 103 return _mm_avg_epu8(A, B); 104 } 105 106 __m128i test_mm_avg_epu16(__m128i A, __m128i B) { 107 // CHECK-LABEL: test_mm_avg_epu16 108 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 109 return _mm_avg_epu16(A, B); 110 } 111 112 __m128i test_mm_bslli_si128(__m128i A) { 113 // CHECK-LABEL: test_mm_bslli_si128 114 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 115 return _mm_bslli_si128(A, 5); 116 } 117 118 __m128i test_mm_bsrli_si128(__m128i A) { 119 // CHECK-LABEL: test_mm_bsrli_si128 120 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 121 return _mm_bsrli_si128(A, 5); 122 } 123 124 __m128 test_mm_castpd_ps(__m128d A) { 125 // CHECK-LABEL: test_mm_castpd_ps 126 // CHECK: bitcast <2 x double> %{{.*}} to <4 x float> 127 return _mm_castpd_ps(A); 128 } 129 130 __m128i test_mm_castpd_si128(__m128d A) { 131 // CHECK-LABEL: test_mm_castpd_si128 132 // CHECK: bitcast <2 x double> %{{.*}} to <2 x i64> 133 return _mm_castpd_si128(A); 134 } 135 136 __m128d test_mm_castps_pd(__m128 A) { 137 // CHECK-LABEL: test_mm_castps_pd 138 // CHECK: bitcast <4 x float> %{{.*}} to <2 x double> 139 return _mm_castps_pd(A); 140 } 141 142 __m128i test_mm_castps_si128(__m128 A) { 143 // CHECK-LABEL: test_mm_castps_si128 144 // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64> 145 return _mm_castps_si128(A); 146 } 147 148 __m128d test_mm_castsi128_pd(__m128i A) { 149 // CHECK-LABEL: test_mm_castsi128_pd 150 // CHECK: bitcast <2 x i64> %{{.*}} to <2 x double> 151 return _mm_castsi128_pd(A); 152 } 153 154 __m128 test_mm_castsi128_ps(__m128i A) { 155 // CHECK-LABEL: test_mm_castsi128_ps 156 // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float> 157 return _mm_castsi128_ps(A); 158 } 159 160 void test_mm_clflush(void* A) { 161 // CHECK-LABEL: test_mm_clflush 162 // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}}) 163 _mm_clflush(A); 164 } 165 166 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) { 167 // CHECK-LABEL: test_mm_cmpeq_epi8 168 // CHECK: icmp eq <16 x i8> 169 return _mm_cmpeq_epi8(A, B); 170 } 171 172 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) { 173 // CHECK-LABEL: test_mm_cmpeq_epi16 174 // CHECK: icmp eq <8 x i16> 175 return _mm_cmpeq_epi16(A, B); 176 } 177 178 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) { 179 // CHECK-LABEL: test_mm_cmpeq_epi32 180 // CHECK: icmp eq <4 x i32> 181 return _mm_cmpeq_epi32(A, B); 182 } 183 184 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) { 185 // CHECK-LABEL: test_mm_cmpeq_pd 186 // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double> 187 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 188 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 189 // CHECK-NEXT: ret <2 x double> [[BC]] 190 return _mm_cmpeq_pd(A, B); 191 } 192 193 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) { 194 // CHECK-LABEL: test_mm_cmpeq_sd 195 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0) 196 return _mm_cmpeq_sd(A, B); 197 } 198 199 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) { 200 // CHECK-LABEL: test_mm_cmpge_pd 201 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double> 202 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 203 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 204 // CHECK-NEXT: ret <2 x double> [[BC]] 205 return _mm_cmpge_pd(A, B); 206 } 207 208 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) { 209 // CHECK-LABEL: test_mm_cmpge_sd 210 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2) 211 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 212 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 213 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 214 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 215 return _mm_cmpge_sd(A, B); 216 } 217 218 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) { 219 // CHECK-LABEL: test_mm_cmpgt_epi8 220 // CHECK: icmp sgt <16 x i8> 221 return _mm_cmpgt_epi8(A, B); 222 } 223 224 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) { 225 // CHECK-LABEL: test_mm_cmpgt_epi16 226 // CHECK: icmp sgt <8 x i16> 227 return _mm_cmpgt_epi16(A, B); 228 } 229 230 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) { 231 // CHECK-LABEL: test_mm_cmpgt_epi32 232 // CHECK: icmp sgt <4 x i32> 233 return _mm_cmpgt_epi32(A, B); 234 } 235 236 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) { 237 // CHECK-LABEL: test_mm_cmpgt_pd 238 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double> 239 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 240 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 241 // CHECK-NEXT: ret <2 x double> [[BC]] 242 return _mm_cmpgt_pd(A, B); 243 } 244 245 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) { 246 // CHECK-LABEL: test_mm_cmpgt_sd 247 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1) 248 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 249 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 250 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 251 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 252 return _mm_cmpgt_sd(A, B); 253 } 254 255 __m128d test_mm_cmple_pd(__m128d A, __m128d B) { 256 // CHECK-LABEL: test_mm_cmple_pd 257 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double> 258 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 259 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 260 // CHECK-NEXT: ret <2 x double> [[BC]] 261 return _mm_cmple_pd(A, B); 262 } 263 264 __m128d test_mm_cmple_sd(__m128d A, __m128d B) { 265 // CHECK-LABEL: test_mm_cmple_sd 266 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2) 267 return _mm_cmple_sd(A, B); 268 } 269 270 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) { 271 // CHECK-LABEL: test_mm_cmplt_epi8 272 // CHECK: icmp sgt <16 x i8> 273 return _mm_cmplt_epi8(A, B); 274 } 275 276 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) { 277 // CHECK-LABEL: test_mm_cmplt_epi16 278 // CHECK: icmp sgt <8 x i16> 279 return _mm_cmplt_epi16(A, B); 280 } 281 282 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) { 283 // CHECK-LABEL: test_mm_cmplt_epi32 284 // CHECK: icmp sgt <4 x i32> 285 return _mm_cmplt_epi32(A, B); 286 } 287 288 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) { 289 // CHECK-LABEL: test_mm_cmplt_pd 290 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double> 291 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 292 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 293 // CHECK-NEXT: ret <2 x double> [[BC]] 294 return _mm_cmplt_pd(A, B); 295 } 296 297 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) { 298 // CHECK-LABEL: test_mm_cmplt_sd 299 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1) 300 return _mm_cmplt_sd(A, B); 301 } 302 303 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) { 304 // CHECK-LABEL: test_mm_cmpneq_pd 305 // CHECK: [[CMP:%.*]] = fcmp une <2 x double> 306 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 307 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 308 // CHECK-NEXT: ret <2 x double> [[BC]] 309 return _mm_cmpneq_pd(A, B); 310 } 311 312 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) { 313 // CHECK-LABEL: test_mm_cmpneq_sd 314 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4) 315 return _mm_cmpneq_sd(A, B); 316 } 317 318 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) { 319 // CHECK-LABEL: test_mm_cmpnge_pd 320 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double> 321 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 322 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 323 // CHECK-NEXT: ret <2 x double> [[BC]] 324 return _mm_cmpnge_pd(A, B); 325 } 326 327 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) { 328 // CHECK-LABEL: test_mm_cmpnge_sd 329 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6) 330 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 331 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 332 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 333 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 334 return _mm_cmpnge_sd(A, B); 335 } 336 337 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) { 338 // CHECK-LABEL: test_mm_cmpngt_pd 339 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double> 340 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 341 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 342 // CHECK-NEXT: ret <2 x double> [[BC]] 343 return _mm_cmpngt_pd(A, B); 344 } 345 346 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) { 347 // CHECK-LABEL: test_mm_cmpngt_sd 348 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5) 349 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 350 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 351 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 352 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 353 return _mm_cmpngt_sd(A, B); 354 } 355 356 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) { 357 // CHECK-LABEL: test_mm_cmpnle_pd 358 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double> 359 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 360 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 361 // CHECK-NEXT: ret <2 x double> [[BC]] 362 return _mm_cmpnle_pd(A, B); 363 } 364 365 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) { 366 // CHECK-LABEL: test_mm_cmpnle_sd 367 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6) 368 return _mm_cmpnle_sd(A, B); 369 } 370 371 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) { 372 // CHECK-LABEL: test_mm_cmpnlt_pd 373 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double> 374 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 375 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 376 // CHECK-NEXT: ret <2 x double> [[BC]] 377 return _mm_cmpnlt_pd(A, B); 378 } 379 380 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) { 381 // CHECK-LABEL: test_mm_cmpnlt_sd 382 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5) 383 return _mm_cmpnlt_sd(A, B); 384 } 385 386 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) { 387 // CHECK-LABEL: test_mm_cmpord_pd 388 // CHECK: [[CMP:%.*]] = fcmp ord <2 x double> 389 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 390 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 391 // CHECK-NEXT: ret <2 x double> [[BC]] 392 return _mm_cmpord_pd(A, B); 393 } 394 395 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) { 396 // CHECK-LABEL: test_mm_cmpord_sd 397 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7) 398 return _mm_cmpord_sd(A, B); 399 } 400 401 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) { 402 // CHECK-LABEL: test_mm_cmpunord_pd 403 // CHECK: [[CMP:%.*]] = fcmp uno <2 x double> 404 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> 405 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> 406 // CHECK-NEXT: ret <2 x double> [[BC]] 407 return _mm_cmpunord_pd(A, B); 408 } 409 410 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) { 411 // CHECK-LABEL: test_mm_cmpunord_sd 412 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3) 413 return _mm_cmpunord_sd(A, B); 414 } 415 416 int test_mm_comieq_sd(__m128d A, __m128d B) { 417 // CHECK-LABEL: test_mm_comieq_sd 418 // CHECK: call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 419 return _mm_comieq_sd(A, B); 420 } 421 422 int test_mm_comige_sd(__m128d A, __m128d B) { 423 // CHECK-LABEL: test_mm_comige_sd 424 // CHECK: call i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 425 return _mm_comige_sd(A, B); 426 } 427 428 int test_mm_comigt_sd(__m128d A, __m128d B) { 429 // CHECK-LABEL: test_mm_comigt_sd 430 // CHECK: call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 431 return _mm_comigt_sd(A, B); 432 } 433 434 int test_mm_comile_sd(__m128d A, __m128d B) { 435 // CHECK-LABEL: test_mm_comile_sd 436 // CHECK: call i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 437 return _mm_comile_sd(A, B); 438 } 439 440 int test_mm_comilt_sd(__m128d A, __m128d B) { 441 // CHECK-LABEL: test_mm_comilt_sd 442 // CHECK: call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 443 return _mm_comilt_sd(A, B); 444 } 445 446 int test_mm_comineq_sd(__m128d A, __m128d B) { 447 // CHECK-LABEL: test_mm_comineq_sd 448 // CHECK: call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 449 return _mm_comineq_sd(A, B); 450 } 451 452 __m128d test_mm_cvtepi32_pd(__m128i A) { 453 // CHECK-LABEL: test_mm_cvtepi32_pd 454 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1> 455 // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double> 456 return _mm_cvtepi32_pd(A); 457 } 458 459 __m128 test_mm_cvtepi32_ps(__m128i A) { 460 // CHECK-LABEL: test_mm_cvtepi32_ps 461 // CHECK: call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %{{.*}}) 462 return _mm_cvtepi32_ps(A); 463 } 464 465 __m128i test_mm_cvtpd_epi32(__m128d A) { 466 // CHECK-LABEL: test_mm_cvtpd_epi32 467 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %{{.*}}) 468 return _mm_cvtpd_epi32(A); 469 } 470 471 __m128 test_mm_cvtpd_ps(__m128d A) { 472 // CHECK-LABEL: test_mm_cvtpd_ps 473 // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %{{.*}}) 474 return _mm_cvtpd_ps(A); 475 } 476 477 __m128i test_mm_cvtps_epi32(__m128 A) { 478 // CHECK-LABEL: test_mm_cvtps_epi32 479 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}}) 480 return _mm_cvtps_epi32(A); 481 } 482 483 __m128d test_mm_cvtps_pd(__m128 A) { 484 // CHECK-LABEL: test_mm_cvtps_pd 485 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1> 486 // CHECK: fpext <2 x float> %{{.*}} to <2 x double> 487 return _mm_cvtps_pd(A); 488 } 489 490 double test_mm_cvtsd_f64(__m128d A) { 491 // CHECK-LABEL: test_mm_cvtsd_f64 492 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 493 return _mm_cvtsd_f64(A); 494 } 495 496 int test_mm_cvtsd_si32(__m128d A) { 497 // CHECK-LABEL: test_mm_cvtsd_si32 498 // CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}}) 499 return _mm_cvtsd_si32(A); 500 } 501 502 long long test_mm_cvtsd_si64(__m128d A) { 503 // CHECK-LABEL: test_mm_cvtsd_si64 504 // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}}) 505 return _mm_cvtsd_si64(A); 506 } 507 508 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) { 509 // CHECK-LABEL: test_mm_cvtsd_ss 510 // CHECK: fptrunc double %{{.*}} to float 511 return _mm_cvtsd_ss(A, B); 512 } 513 514 int test_mm_cvtsi128_si32(__m128i A) { 515 // CHECK-LABEL: test_mm_cvtsi128_si32 516 // CHECK: extractelement <4 x i32> %{{.*}}, i32 0 517 return _mm_cvtsi128_si32(A); 518 } 519 520 long long test_mm_cvtsi128_si64(__m128i A) { 521 // CHECK-LABEL: test_mm_cvtsi128_si64 522 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0 523 return _mm_cvtsi128_si64(A); 524 } 525 526 __m128d test_mm_cvtsi32_sd(__m128d A, int B) { 527 // CHECK-LABEL: test_mm_cvtsi32_sd 528 // CHECK: sitofp i32 %{{.*}} to double 529 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 530 return _mm_cvtsi32_sd(A, B); 531 } 532 533 __m128i test_mm_cvtsi32_si128(int A) { 534 // CHECK-LABEL: test_mm_cvtsi32_si128 535 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 536 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1 537 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2 538 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3 539 return _mm_cvtsi32_si128(A); 540 } 541 542 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) { 543 // CHECK-LABEL: test_mm_cvtsi64_sd 544 // CHECK: sitofp i64 %{{.*}} to double 545 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 546 return _mm_cvtsi64_sd(A, B); 547 } 548 549 __m128i test_mm_cvtsi64_si128(long long A) { 550 // CHECK-LABEL: test_mm_cvtsi64_si128 551 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 552 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1 553 return _mm_cvtsi64_si128(A); 554 } 555 556 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) { 557 // CHECK-LABEL: test_mm_cvtss_sd 558 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 559 // CHECK: fpext float %{{.*}} to double 560 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 561 return _mm_cvtss_sd(A, B); 562 } 563 564 __m128i test_mm_cvttpd_epi32(__m128d A) { 565 // CHECK-LABEL: test_mm_cvttpd_epi32 566 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}}) 567 return _mm_cvttpd_epi32(A); 568 } 569 570 __m128i test_mm_cvttps_epi32(__m128 A) { 571 // CHECK-LABEL: test_mm_cvttps_epi32 572 // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32> 573 return _mm_cvttps_epi32(A); 574 } 575 576 int test_mm_cvttsd_si32(__m128d A) { 577 // CHECK-LABEL: test_mm_cvttsd_si32 578 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 579 // CHECK: fptosi double %{{.*}} to i32 580 return _mm_cvttsd_si32(A); 581 } 582 583 long long test_mm_cvttsd_si64(__m128d A) { 584 // CHECK-LABEL: test_mm_cvttsd_si64 585 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 586 // CHECK: fptosi double %{{.*}} to i64 587 return _mm_cvttsd_si64(A); 588 } 589 590 __m128d test_mm_div_pd(__m128d A, __m128d B) { 591 // CHECK-LABEL: test_mm_div_pd 592 // CHECK: fdiv <2 x double> 593 return _mm_div_pd(A, B); 594 } 595 596 __m128d test_mm_div_sd(__m128d A, __m128d B) { 597 // CHECK-LABEL: test_mm_div_sd 598 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 599 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 600 // CHECK: fdiv double 601 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 602 return _mm_div_sd(A, B); 603 } 604 605 // Lowering to pextrw requires optimization. 606 int test_mm_extract_epi16(__m128i A) { 607 // CHECK-LABEL: test_mm_extract_epi16 608 // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7 609 // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]] 610 // CHECK: zext i16 %{{.*}} to i32 611 return _mm_extract_epi16(A, 9); 612 } 613 614 __m128i test_mm_insert_epi16(__m128i A, int B) { 615 // CHECK-LABEL: test_mm_insert_epi16 616 // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7 617 // CHECK: insertelement <8 x i16> %{{.*}}, i32 [[x]] 618 return _mm_insert_epi16(A, B, 8); 619 } 620 621 void test_mm_lfence() { 622 // CHECK-LABEL: test_mm_lfence 623 // CHECK: call void @llvm.x86.sse2.lfence() 624 _mm_lfence(); 625 } 626 627 __m128d test_mm_load_pd(double const* A) { 628 // CHECK-LABEL: test_mm_load_pd 629 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16 630 return _mm_load_pd(A); 631 } 632 633 __m128d test_mm_load_pd1(double const* A) { 634 // CHECK-LABEL: test_mm_load_pd1 635 // CHECK: load double, double* %{{.*}}, align 8 636 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 637 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 638 return _mm_load_pd1(A); 639 } 640 641 __m128d test_mm_load_sd(double const* A) { 642 // CHECK-LABEL: test_mm_load_sd 643 // CHECK: load double, double* %{{.*}}, align 1{{$}} 644 return _mm_load_sd(A); 645 } 646 647 __m128i test_mm_load_si128(__m128i const* A) { 648 // CHECK-LABEL: test_mm_load_si128 649 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 650 return _mm_load_si128(A); 651 } 652 653 __m128d test_mm_load1_pd(double const* A) { 654 // CHECK-LABEL: test_mm_load1_pd 655 // CHECK: load double, double* %{{.*}}, align 8 656 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 657 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 658 return _mm_load1_pd(A); 659 } 660 661 __m128d test_mm_loadh_pd(__m128d x, void* y) { 662 // CHECK-LABEL: test_mm_loadh_pd 663 // CHECK: load double, double* %{{.*}}, align 1{{$}} 664 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 665 return _mm_loadh_pd(x, y); 666 } 667 668 __m128i test_mm_loadl_epi64(__m128i* y) { 669 // CHECK: test_mm_loadl_epi64 670 // CHECK: load i64, i64* {{.*}}, align 1{{$}} 671 // CHECK: insertelement <2 x i64> undef, i64 {{.*}}, i32 0 672 // CHECK: insertelement <2 x i64> {{.*}}, i64 0, i32 1 673 return _mm_loadl_epi64(y); 674 } 675 676 __m128d test_mm_loadl_pd(__m128d x, void* y) { 677 // CHECK-LABEL: test_mm_loadl_pd 678 // CHECK: load double, double* %{{.*}}, align 1{{$}} 679 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 680 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 681 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 682 return _mm_loadl_pd(x, y); 683 } 684 685 __m128d test_mm_loadr_pd(double const* A) { 686 // CHECK-LABEL: test_mm_loadr_pd 687 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16 688 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0> 689 return _mm_loadr_pd(A); 690 } 691 692 __m128d test_mm_loadu_pd(double const* A) { 693 // CHECK-LABEL: test_mm_loadu_pd 694 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}} 695 return _mm_loadu_pd(A); 696 } 697 698 __m128i test_mm_loadu_si128(__m128i const* A) { 699 // CHECK-LABEL: test_mm_loadu_si128 700 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}} 701 return _mm_loadu_si128(A); 702 } 703 704 __m128i test_mm_loadu_si64(void const* A) { 705 // CHECK-LABEL: test_mm_loadu_si64 706 // CHECK: load i64, i64* %{{.*}}, align 1{{$}} 707 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 708 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1 709 return _mm_loadu_si64(A); 710 } 711 712 __m128i test_mm_madd_epi16(__m128i A, __m128i B) { 713 // CHECK-LABEL: test_mm_madd_epi16 714 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 715 return _mm_madd_epi16(A, B); 716 } 717 718 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) { 719 // CHECK-LABEL: test_mm_maskmoveu_si128 720 // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}}) 721 _mm_maskmoveu_si128(A, B, C); 722 } 723 724 __m128i test_mm_max_epi16(__m128i A, __m128i B) { 725 // CHECK-LABEL: test_mm_max_epi16 726 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] 727 // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] 728 return _mm_max_epi16(A, B); 729 } 730 731 __m128i test_mm_max_epu8(__m128i A, __m128i B) { 732 // CHECK-LABEL: test_mm_max_epu8 733 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] 734 // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] 735 return _mm_max_epu8(A, B); 736 } 737 738 __m128d test_mm_max_pd(__m128d A, __m128d B) { 739 // CHECK-LABEL: test_mm_max_pd 740 // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 741 return _mm_max_pd(A, B); 742 } 743 744 __m128d test_mm_max_sd(__m128d A, __m128d B) { 745 // CHECK-LABEL: test_mm_max_sd 746 // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 747 return _mm_max_sd(A, B); 748 } 749 750 void test_mm_mfence() { 751 // CHECK-LABEL: test_mm_mfence 752 // CHECK: call void @llvm.x86.sse2.mfence() 753 _mm_mfence(); 754 } 755 756 __m128i test_mm_min_epi16(__m128i A, __m128i B) { 757 // CHECK-LABEL: test_mm_min_epi16 758 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] 759 // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] 760 return _mm_min_epi16(A, B); 761 } 762 763 __m128i test_mm_min_epu8(__m128i A, __m128i B) { 764 // CHECK-LABEL: test_mm_min_epu8 765 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] 766 // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] 767 return _mm_min_epu8(A, B); 768 } 769 770 __m128d test_mm_min_pd(__m128d A, __m128d B) { 771 // CHECK-LABEL: test_mm_min_pd 772 // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 773 return _mm_min_pd(A, B); 774 } 775 776 __m128d test_mm_min_sd(__m128d A, __m128d B) { 777 // CHECK-LABEL: test_mm_min_sd 778 // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 779 return _mm_min_sd(A, B); 780 } 781 782 __m128i test_mm_move_epi64(__m128i A) { 783 // CHECK-LABEL: test_mm_move_epi64 784 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2> 785 return _mm_move_epi64(A); 786 } 787 788 __m128d test_mm_move_sd(__m128d A, __m128d B) { 789 // CHECK-LABEL: test_mm_move_sd 790 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 791 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 792 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 793 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 794 return _mm_move_sd(A, B); 795 } 796 797 int test_mm_movemask_epi8(__m128i A) { 798 // CHECK-LABEL: test_mm_movemask_epi8 799 // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}}) 800 return _mm_movemask_epi8(A); 801 } 802 803 int test_mm_movemask_pd(__m128d A) { 804 // CHECK-LABEL: test_mm_movemask_pd 805 // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}}) 806 return _mm_movemask_pd(A); 807 } 808 809 __m128i test_mm_mul_epu32(__m128i A, __m128i B) { 810 // CHECK-LABEL: test_mm_mul_epu32 811 // CHECK: call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 812 return _mm_mul_epu32(A, B); 813 } 814 815 __m128d test_mm_mul_pd(__m128d A, __m128d B) { 816 // CHECK-LABEL: test_mm_mul_pd 817 // CHECK: fmul <2 x double> %{{.*}}, %{{.*}} 818 return _mm_mul_pd(A, B); 819 } 820 821 __m128d test_mm_mul_sd(__m128d A, __m128d B) { 822 // CHECK-LABEL: test_mm_mul_sd 823 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 824 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 825 // CHECK: fmul double 826 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 827 return _mm_mul_sd(A, B); 828 } 829 830 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) { 831 // CHECK-LABEL: test_mm_mulhi_epi16 832 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 833 return _mm_mulhi_epi16(A, B); 834 } 835 836 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) { 837 // CHECK-LABEL: test_mm_mulhi_epu16 838 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 839 return _mm_mulhi_epu16(A, B); 840 } 841 842 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) { 843 // CHECK-LABEL: test_mm_mullo_epi16 844 // CHECK: mul <8 x i16> %{{.*}}, %{{.*}} 845 return _mm_mullo_epi16(A, B); 846 } 847 848 __m128d test_mm_or_pd(__m128d A, __m128d B) { 849 // CHECK-LABEL: test_mm_or_pd 850 // CHECK: or <4 x i32> %{{.*}}, %{{.*}} 851 return _mm_or_pd(A, B); 852 } 853 854 __m128i test_mm_or_si128(__m128i A, __m128i B) { 855 // CHECK-LABEL: test_mm_or_si128 856 // CHECK: or <2 x i64> %{{.*}}, %{{.*}} 857 return _mm_or_si128(A, B); 858 } 859 860 __m128i test_mm_packs_epi16(__m128i A, __m128i B) { 861 // CHECK-LABEL: test_mm_packs_epi16 862 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 863 return _mm_packs_epi16(A, B); 864 } 865 866 __m128i test_mm_packs_epi32(__m128i A, __m128i B) { 867 // CHECK-LABEL: test_mm_packs_epi32 868 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 869 return _mm_packs_epi32(A, B); 870 } 871 872 __m128i test_mm_packus_epi16(__m128i A, __m128i B) { 873 // CHECK-LABEL: test_mm_packus_epi16 874 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 875 return _mm_packus_epi16(A, B); 876 } 877 878 void test_mm_pause() { 879 // CHECK-LABEL: test_mm_pause 880 // CHECK: call void @llvm.x86.sse2.pause() 881 return _mm_pause(); 882 } 883 884 __m128i test_mm_sad_epu8(__m128i A, __m128i B) { 885 // CHECK-LABEL: test_mm_sad_epu8 886 // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 887 return _mm_sad_epu8(A, B); 888 } 889 890 __m128i test_mm_set_epi8(char A, char B, char C, char D, 891 char E, char F, char G, char H, 892 char I, char J, char K, char L, 893 char M, char N, char O, char P) { 894 // CHECK-LABEL: test_mm_set_epi8 895 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 896 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 897 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 898 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 899 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 900 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 901 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 902 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 903 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 904 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 905 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 906 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 907 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 908 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 909 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 910 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 911 return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P); 912 } 913 914 __m128i test_mm_set_epi16(short A, short B, short C, short D, 915 short E, short F, short G, short H) { 916 // CHECK-LABEL: test_mm_set_epi16 917 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0 918 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1 919 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2 920 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3 921 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4 922 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5 923 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6 924 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7 925 return _mm_set_epi16(A, B, C, D, E, F, G, H); 926 } 927 928 __m128i test_mm_set_epi32(int A, int B, int C, int D) { 929 // CHECK-LABEL: test_mm_set_epi32 930 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 931 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1 932 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2 933 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3 934 return _mm_set_epi32(A, B, C, D); 935 } 936 937 __m128i test_mm_set_epi64(__m64 A, __m64 B) { 938 // CHECK-LABEL: test_mm_set_epi64 939 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 940 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 941 return _mm_set_epi64(A, B); 942 } 943 944 __m128i test_mm_set_epi64x(long long A, long long B) { 945 // CHECK-LABEL: test_mm_set_epi64x 946 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 947 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 948 return _mm_set_epi64x(A, B); 949 } 950 951 __m128d test_mm_set_pd(double A, double B) { 952 // CHECK-LABEL: test_mm_set_pd 953 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 954 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 955 return _mm_set_pd(A, B); 956 } 957 958 __m128d test_mm_set_sd(double A) { 959 // CHECK-LABEL: test_mm_set_sd 960 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 961 // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1 962 return _mm_set_sd(A); 963 } 964 965 __m128i test_mm_set1_epi8(char A) { 966 // CHECK-LABEL: test_mm_set1_epi8 967 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 968 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 969 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 970 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 971 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 972 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 973 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 974 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 975 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 976 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 977 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 978 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 979 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 980 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 981 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 982 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 983 return _mm_set1_epi8(A); 984 } 985 986 __m128i test_mm_set1_epi16(short A) { 987 // CHECK-LABEL: test_mm_set1_epi16 988 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0 989 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1 990 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2 991 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3 992 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4 993 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5 994 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6 995 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7 996 return _mm_set1_epi16(A); 997 } 998 999 __m128i test_mm_set1_epi32(int A) { 1000 // CHECK-LABEL: test_mm_set1_epi32 1001 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 1002 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1 1003 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2 1004 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3 1005 return _mm_set1_epi32(A); 1006 } 1007 1008 __m128i test_mm_set1_epi64(__m64 A) { 1009 // CHECK-LABEL: test_mm_set1_epi64 1010 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 1011 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 1012 return _mm_set1_epi64(A); 1013 } 1014 1015 __m128i test_mm_set1_epi64x(long long A) { 1016 // CHECK-LABEL: test_mm_set1_epi64x 1017 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 1018 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 1019 return _mm_set1_epi64x(A); 1020 } 1021 1022 __m128d test_mm_set1_pd(double A) { 1023 // CHECK-LABEL: test_mm_set1_pd 1024 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 1025 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 1026 return _mm_set1_pd(A); 1027 } 1028 1029 __m128i test_mm_setr_epi8(char A, char B, char C, char D, 1030 char E, char F, char G, char H, 1031 char I, char J, char K, char L, 1032 char M, char N, char O, char P) { 1033 // CHECK-LABEL: test_mm_setr_epi8 1034 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 1035 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 1036 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 1037 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 1038 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 1039 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 1040 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 1041 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 1042 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 1043 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 1044 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 1045 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 1046 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 1047 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 1048 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 1049 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 1050 return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P); 1051 } 1052 1053 __m128i test_mm_setr_epi16(short A, short B, short C, short D, 1054 short E, short F, short G, short H) { 1055 // CHECK-LABEL: test_mm_setr_epi16 1056 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0 1057 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1 1058 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2 1059 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3 1060 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4 1061 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5 1062 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6 1063 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7 1064 return _mm_setr_epi16(A, B, C, D, E, F, G, H); 1065 } 1066 1067 __m128i test_mm_setr_epi32(int A, int B, int C, int D) { 1068 // CHECK-LABEL: test_mm_setr_epi32 1069 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 1070 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1 1071 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2 1072 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3 1073 return _mm_setr_epi32(A, B, C, D); 1074 } 1075 1076 __m128i test_mm_setr_epi64(__m64 A, __m64 B) { 1077 // CHECK-LABEL: test_mm_setr_epi64 1078 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 1079 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 1080 return _mm_setr_epi64(A, B); 1081 } 1082 1083 __m128d test_mm_setr_pd(double A, double B) { 1084 // CHECK-LABEL: test_mm_setr_pd 1085 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 1086 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 1087 return _mm_setr_pd(A, B); 1088 } 1089 1090 __m128d test_mm_setzero_pd() { 1091 // CHECK-LABEL: test_mm_setzero_pd 1092 // CHECK: store <2 x double> zeroinitializer 1093 return _mm_setzero_pd(); 1094 } 1095 1096 __m128i test_mm_setzero_si128() { 1097 // CHECK-LABEL: test_mm_setzero_si128 1098 // CHECK: store <2 x i64> zeroinitializer 1099 return _mm_setzero_si128(); 1100 } 1101 1102 __m128i test_mm_shuffle_epi32(__m128i A) { 1103 // CHECK-LABEL: test_mm_shuffle_epi32 1104 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer 1105 return _mm_shuffle_epi32(A, 0); 1106 } 1107 1108 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) { 1109 // CHECK-LABEL: test_mm_shuffle_pd 1110 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2> 1111 return _mm_shuffle_pd(A, B, 1); 1112 } 1113 1114 __m128i test_mm_shufflehi_epi16(__m128i A) { 1115 // CHECK-LABEL: test_mm_shufflehi_epi16 1116 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> 1117 return _mm_shufflehi_epi16(A, 0); 1118 } 1119 1120 __m128i test_mm_shufflelo_epi16(__m128i A) { 1121 // CHECK-LABEL: test_mm_shufflelo_epi16 1122 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 1123 return _mm_shufflelo_epi16(A, 0); 1124 } 1125 1126 __m128i test_mm_sll_epi16(__m128i A, __m128i B) { 1127 // CHECK-LABEL: test_mm_sll_epi16 1128 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 1129 return _mm_sll_epi16(A, B); 1130 } 1131 1132 __m128i test_mm_sll_epi32(__m128i A, __m128i B) { 1133 // CHECK-LABEL: test_mm_sll_epi32 1134 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 1135 return _mm_sll_epi32(A, B); 1136 } 1137 1138 __m128i test_mm_sll_epi64(__m128i A, __m128i B) { 1139 // CHECK-LABEL: test_mm_sll_epi64 1140 // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) 1141 return _mm_sll_epi64(A, B); 1142 } 1143 1144 __m128i test_mm_slli_epi16(__m128i A) { 1145 // CHECK-LABEL: test_mm_slli_epi16 1146 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}}) 1147 return _mm_slli_epi16(A, 1); 1148 } 1149 1150 __m128i test_mm_slli_epi32(__m128i A) { 1151 // CHECK-LABEL: test_mm_slli_epi32 1152 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}}) 1153 return _mm_slli_epi32(A, 1); 1154 } 1155 1156 __m128i test_mm_slli_epi64(__m128i A) { 1157 // CHECK-LABEL: test_mm_slli_epi64 1158 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}}) 1159 return _mm_slli_epi64(A, 1); 1160 } 1161 1162 __m128i test_mm_slli_si128(__m128i A) { 1163 // CHECK-LABEL: test_mm_slli_si128 1164 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 1165 return _mm_slli_si128(A, 5); 1166 } 1167 1168 __m128i test_mm_slli_si128_2(__m128i A) { 1169 // CHECK-LABEL: test_mm_slli_si128_2 1170 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1171 return _mm_slli_si128(A, 17); 1172 } 1173 1174 __m128d test_mm_sqrt_pd(__m128d A) { 1175 // CHECK-LABEL: test_mm_sqrt_pd 1176 // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %{{.*}}) 1177 return _mm_sqrt_pd(A); 1178 } 1179 1180 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) { 1181 // CHECK-LABEL: test_mm_sqrt_sd 1182 // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}}) 1183 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 1184 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0 1185 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 1186 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 1187 return _mm_sqrt_sd(A, B); 1188 } 1189 1190 __m128i test_mm_sra_epi16(__m128i A, __m128i B) { 1191 // CHECK-LABEL: test_mm_sra_epi16 1192 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 1193 return _mm_sra_epi16(A, B); 1194 } 1195 1196 __m128i test_mm_sra_epi32(__m128i A, __m128i B) { 1197 // CHECK-LABEL: test_mm_sra_epi32 1198 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 1199 return _mm_sra_epi32(A, B); 1200 } 1201 1202 __m128i test_mm_srai_epi16(__m128i A) { 1203 // CHECK-LABEL: test_mm_srai_epi16 1204 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}}) 1205 return _mm_srai_epi16(A, 1); 1206 } 1207 1208 __m128i test_mm_srai_epi32(__m128i A) { 1209 // CHECK-LABEL: test_mm_srai_epi32 1210 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}}) 1211 return _mm_srai_epi32(A, 1); 1212 } 1213 1214 __m128i test_mm_srl_epi16(__m128i A, __m128i B) { 1215 // CHECK-LABEL: test_mm_srl_epi16 1216 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 1217 return _mm_srl_epi16(A, B); 1218 } 1219 1220 __m128i test_mm_srl_epi32(__m128i A, __m128i B) { 1221 // CHECK-LABEL: test_mm_srl_epi32 1222 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 1223 return _mm_srl_epi32(A, B); 1224 } 1225 1226 __m128i test_mm_srl_epi64(__m128i A, __m128i B) { 1227 // CHECK-LABEL: test_mm_srl_epi64 1228 // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) 1229 return _mm_srl_epi64(A, B); 1230 } 1231 1232 __m128i test_mm_srli_epi16(__m128i A) { 1233 // CHECK-LABEL: test_mm_srli_epi16 1234 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}}) 1235 return _mm_srli_epi16(A, 1); 1236 } 1237 1238 __m128i test_mm_srli_epi32(__m128i A) { 1239 // CHECK-LABEL: test_mm_srli_epi32 1240 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}}) 1241 return _mm_srli_epi32(A, 1); 1242 } 1243 1244 __m128i test_mm_srli_epi64(__m128i A) { 1245 // CHECK-LABEL: test_mm_srli_epi64 1246 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}}) 1247 return _mm_srli_epi64(A, 1); 1248 } 1249 1250 __m128i test_mm_srli_si128(__m128i A) { 1251 // CHECK-LABEL: test_mm_srli_si128 1252 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 1253 return _mm_srli_si128(A, 5); 1254 } 1255 1256 __m128i test_mm_srli_si128_2(__m128i A) { 1257 // CHECK-LABEL: test_mm_srli_si128_2 1258 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 1259 return _mm_srli_si128(A, 17); 1260 } 1261 1262 void test_mm_store_pd(double* A, __m128d B) { 1263 // CHECK-LABEL: test_mm_store_pd 1264 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16 1265 _mm_store_pd(A, B); 1266 } 1267 1268 void test_mm_store_pd1(double* x, __m128d y) { 1269 // CHECK-LABEL: test_mm_store_pd1 1270 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer 1271 // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16 1272 _mm_store_pd1(x, y); 1273 } 1274 1275 void test_mm_store_sd(double* A, __m128d B) { 1276 // CHECK-LABEL: test_mm_store_sd 1277 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 1278 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}} 1279 _mm_store_sd(A, B); 1280 } 1281 1282 void test_mm_store_si128(__m128i* A, __m128i B) { 1283 // CHECK-LABEL: test_mm_store_si128 1284 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 1285 _mm_store_si128(A, B); 1286 } 1287 1288 void test_mm_store1_pd(double* x, __m128d y) { 1289 // CHECK-LABEL: test_mm_store1_pd 1290 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer 1291 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16 1292 _mm_store1_pd(x, y); 1293 } 1294 1295 void test_mm_storeh_pd(double* A, __m128d B) { 1296 // CHECK-LABEL: test_mm_storeh_pd 1297 // CHECK: extractelement <2 x double> %{{.*}}, i32 1 1298 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}} 1299 _mm_storeh_pd(A, B); 1300 } 1301 1302 void test_mm_storel_epi64(__m128i x, void* y) { 1303 // CHECK-LABEL: test_mm_storel_epi64 1304 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0 1305 // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}} 1306 _mm_storel_epi64(y, x); 1307 } 1308 1309 void test_mm_storel_pd(double* A, __m128d B) { 1310 // CHECK-LABEL: test_mm_storel_pd 1311 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 1312 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}} 1313 _mm_storel_pd(A, B); 1314 } 1315 1316 void test_mm_storer_pd(__m128d A, double* B) { 1317 // CHECK-LABEL: test_mm_storer_pd 1318 // CHECK: shufflevector <2 x double> {{.*}}, <2 x double> {{.*}}, <2 x i32> <i32 1, i32 0> 1319 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}} 1320 _mm_storer_pd(B, A); 1321 } 1322 1323 void test_mm_storeu_pd(double* A, __m128d B) { 1324 // CHECK-LABEL: test_mm_storeu_pd 1325 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 1{{$}} 1326 // CHECK-NEXT: ret void 1327 _mm_storeu_pd(A, B); 1328 } 1329 1330 void test_mm_storeu_si128(__m128i* A, __m128i B) { 1331 // CHECK-LABEL: test_mm_storeu_si128 1332 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} 1333 // CHECK-NEXT: ret void 1334 _mm_storeu_si128(A, B); 1335 } 1336 1337 void test_mm_stream_pd(double *A, __m128d B) { 1338 // CHECK-LABEL: test_mm_stream_pd 1339 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal 1340 _mm_stream_pd(A, B); 1341 } 1342 1343 void test_mm_stream_si32(int *A, int B) { 1344 // CHECK-LABEL: test_mm_stream_si32 1345 // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal 1346 _mm_stream_si32(A, B); 1347 } 1348 1349 void test_mm_stream_si64(long long *A, long long B) { 1350 // CHECK-LABEL: test_mm_stream_si64 1351 // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal 1352 _mm_stream_si64(A, B); 1353 } 1354 1355 void test_mm_stream_si128(__m128i *A, __m128i B) { 1356 // CHECK-LABEL: test_mm_stream_si128 1357 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal 1358 _mm_stream_si128(A, B); 1359 } 1360 1361 __m128i test_mm_sub_epi8(__m128i A, __m128i B) { 1362 // CHECK-LABEL: test_mm_sub_epi8 1363 // CHECK: sub <16 x i8> 1364 return _mm_sub_epi8(A, B); 1365 } 1366 1367 __m128i test_mm_sub_epi16(__m128i A, __m128i B) { 1368 // CHECK-LABEL: test_mm_sub_epi16 1369 // CHECK: sub <8 x i16> 1370 return _mm_sub_epi16(A, B); 1371 } 1372 1373 __m128i test_mm_sub_epi32(__m128i A, __m128i B) { 1374 // CHECK-LABEL: test_mm_sub_epi32 1375 // CHECK: sub <4 x i32> 1376 return _mm_sub_epi32(A, B); 1377 } 1378 1379 __m128i test_mm_sub_epi64(__m128i A, __m128i B) { 1380 // CHECK-LABEL: test_mm_sub_epi64 1381 // CHECK: sub <2 x i64> 1382 return _mm_sub_epi64(A, B); 1383 } 1384 1385 __m128d test_mm_sub_pd(__m128d A, __m128d B) { 1386 // CHECK-LABEL: test_mm_sub_pd 1387 // CHECK: fsub <2 x double> 1388 return _mm_sub_pd(A, B); 1389 } 1390 1391 __m128d test_mm_sub_sd(__m128d A, __m128d B) { 1392 // CHECK-LABEL: test_mm_sub_sd 1393 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 1394 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 1395 // CHECK: fsub double 1396 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 1397 return _mm_sub_sd(A, B); 1398 } 1399 1400 __m128i test_mm_subs_epi8(__m128i A, __m128i B) { 1401 // CHECK-LABEL: test_mm_subs_epi8 1402 // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 1403 return _mm_subs_epi8(A, B); 1404 } 1405 1406 __m128i test_mm_subs_epi16(__m128i A, __m128i B) { 1407 // CHECK-LABEL: test_mm_subs_epi16 1408 // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 1409 return _mm_subs_epi16(A, B); 1410 } 1411 1412 __m128i test_mm_subs_epu8(__m128i A, __m128i B) { 1413 // CHECK-LABEL: test_mm_subs_epu8 1414 // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 1415 return _mm_subs_epu8(A, B); 1416 } 1417 1418 __m128i test_mm_subs_epu16(__m128i A, __m128i B) { 1419 // CHECK-LABEL: test_mm_subs_epu16 1420 // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 1421 return _mm_subs_epu16(A, B); 1422 } 1423 1424 int test_mm_ucomieq_sd(__m128d A, __m128d B) { 1425 // CHECK-LABEL: test_mm_ucomieq_sd 1426 // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 1427 return _mm_ucomieq_sd(A, B); 1428 } 1429 1430 int test_mm_ucomige_sd(__m128d A, __m128d B) { 1431 // CHECK-LABEL: test_mm_ucomige_sd 1432 // CHECK: call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 1433 return _mm_ucomige_sd(A, B); 1434 } 1435 1436 int test_mm_ucomigt_sd(__m128d A, __m128d B) { 1437 // CHECK-LABEL: test_mm_ucomigt_sd 1438 // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 1439 return _mm_ucomigt_sd(A, B); 1440 } 1441 1442 int test_mm_ucomile_sd(__m128d A, __m128d B) { 1443 // CHECK-LABEL: test_mm_ucomile_sd 1444 // CHECK: call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 1445 return _mm_ucomile_sd(A, B); 1446 } 1447 1448 int test_mm_ucomilt_sd(__m128d A, __m128d B) { 1449 // CHECK-LABEL: test_mm_ucomilt_sd 1450 // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 1451 return _mm_ucomilt_sd(A, B); 1452 } 1453 1454 int test_mm_ucomineq_sd(__m128d A, __m128d B) { 1455 // CHECK-LABEL: test_mm_ucomineq_sd 1456 // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) 1457 return _mm_ucomineq_sd(A, B); 1458 } 1459 1460 __m128d test_mm_undefined_pd() { 1461 // CHECK-LABEL: @test_mm_undefined_pd 1462 // CHECK: ret <2 x double> undef 1463 return _mm_undefined_pd(); 1464 } 1465 1466 __m128i test_mm_undefined_si128() { 1467 // CHECK-LABEL: @test_mm_undefined_si128 1468 // CHECK: ret <2 x i64> undef 1469 return _mm_undefined_si128(); 1470 } 1471 1472 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) { 1473 // CHECK-LABEL: test_mm_unpackhi_epi8 1474 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 1475 return _mm_unpackhi_epi8(A, B); 1476 } 1477 1478 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) { 1479 // CHECK-LABEL: test_mm_unpackhi_epi16 1480 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 1481 return _mm_unpackhi_epi16(A, B); 1482 } 1483 1484 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) { 1485 // CHECK-LABEL: test_mm_unpackhi_epi32 1486 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1487 return _mm_unpackhi_epi32(A, B); 1488 } 1489 1490 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) { 1491 // CHECK-LABEL: test_mm_unpackhi_epi64 1492 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3> 1493 return _mm_unpackhi_epi64(A, B); 1494 } 1495 1496 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) { 1497 // CHECK-LABEL: test_mm_unpackhi_pd 1498 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3> 1499 return _mm_unpackhi_pd(A, B); 1500 } 1501 1502 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) { 1503 // CHECK-LABEL: test_mm_unpacklo_epi8 1504 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 1505 return _mm_unpacklo_epi8(A, B); 1506 } 1507 1508 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) { 1509 // CHECK-LABEL: test_mm_unpacklo_epi16 1510 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 1511 return _mm_unpacklo_epi16(A, B); 1512 } 1513 1514 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) { 1515 // CHECK-LABEL: test_mm_unpacklo_epi32 1516 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1517 return _mm_unpacklo_epi32(A, B); 1518 } 1519 1520 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) { 1521 // CHECK-LABEL: test_mm_unpacklo_epi64 1522 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2> 1523 return _mm_unpacklo_epi64(A, B); 1524 } 1525 1526 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) { 1527 // CHECK-LABEL: test_mm_unpacklo_pd 1528 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2> 1529 return _mm_unpacklo_pd(A, B); 1530 } 1531 1532 __m128d test_mm_xor_pd(__m128d A, __m128d B) { 1533 // CHECK-LABEL: test_mm_xor_pd 1534 // CHECK: xor <4 x i32> %{{.*}}, %{{.*}} 1535 return _mm_xor_pd(A, B); 1536 } 1537 1538 __m128i test_mm_xor_si128(__m128i A, __m128i B) { 1539 // CHECK-LABEL: test_mm_xor_si128 1540 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} 1541 return _mm_xor_si128(A, B); 1542 } 1543