Lines Matching full:xmm0
57 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
62 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
76 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
81 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
95 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
100 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
132 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
137 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
149 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
154 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
166 ; SSE-NEXT: pand %xmm1, %xmm0
167 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
172 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
173 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
184 ; SSE-NEXT: por %xmm1, %xmm0
185 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
190 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
191 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
202 ; SSE-NEXT: pxor %xmm1, %xmm0
203 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
208 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
209 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
220 ; SSE-NEXT: pand %xmm1, %xmm0
221 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
226 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
227 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
238 ; SSE-NEXT: por %xmm1, %xmm0
239 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
244 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
245 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
256 ; SSE-NEXT: pxor %xmm1, %xmm0
257 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
262 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
263 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
278 ; SSE2-NEXT: pand %xmm1, %xmm0
279 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
281 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
286 ; SSSE3-NEXT: pand %xmm1, %xmm0
287 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
289 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
294 ; SSE41-NEXT: pand %xmm1, %xmm0
295 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
300 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
301 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
306 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
307 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
318 ; SSE2-NEXT: por %xmm1, %xmm0
319 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
321 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
326 ; SSSE3-NEXT: por %xmm1, %xmm0
327 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
329 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
334 ; SSE41-NEXT: por %xmm1, %xmm0
335 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
340 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
341 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
346 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
347 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
358 ; SSE2-NEXT: xorps %xmm1, %xmm0
359 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
364 ; SSSE3-NEXT: xorps %xmm1, %xmm0
365 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
370 ; SSE41-NEXT: pxor %xmm1, %xmm0
372 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
377 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
379 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
384 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
386 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
397 ; SSE2-NEXT: pand %xmm1, %xmm0
398 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
399 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
400 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
405 ; SSSE3-NEXT: pand %xmm1, %xmm0
406 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
407 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
408 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
413 ; SSE41-NEXT: pand %xmm1, %xmm0
414 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
419 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
420 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
425 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
426 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
437 ; SSE2-NEXT: por %xmm1, %xmm0
438 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
439 xmm0 = xmm2[0,2,2,3]
440 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
445 ; SSSE3-NEXT: por %xmm1, %xmm0
446 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
447 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
448 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
453 ; SSE41-NEXT: por %xmm1, %xmm0
454 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
459 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
460 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
465 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
466 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
477 ; SSE2-NEXT: xorps %xmm1, %xmm0
478 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
483 ; SSSE3-NEXT: xorps %xmm1, %xmm0
484 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
489 ; SSE41-NEXT: pxor %xmm1, %xmm0
491 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
496 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
498 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
503 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
505 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
516 ; SSE2-NEXT: pand %xmm1, %xmm0
517 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
519 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
524 ; SSSE3-NEXT: pand %xmm1, %xmm0
525 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
527 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
532 ; SSE41-NEXT: pand %xmm1, %xmm0
533 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
534 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
539 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
540 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
541 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
546 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
547 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
548 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
559 ; SSE2-NEXT: por %xmm1, %xmm0
560 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
562 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
567 ; SSSE3-NEXT: por %xmm1, %xmm0
568 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
570 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
575 ; SSE41-NEXT: por %xmm1, %xmm0
576 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
577 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
582 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
583 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
584 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
589 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
590 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
591 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
602 ; SSE2-NEXT: pxor %xmm1, %xmm0
603 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
605 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
610 ; SSSE3-NEXT: pxor %xmm1, %xmm0
611 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
613 ; SSSE3-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
618 ; SSE41-NEXT: pxor %xmm1, %xmm0
619 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
620 ; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
625 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
626 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
627 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
638 ; SSE2-NEXT: pand %xmm1, %xmm0
639 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
640 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
641 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
646 ; SSSE3-NEXT: pand %xmm1, %xmm0
647 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
648 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
649 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
654 ; SSE41-NEXT: pand %xmm1, %xmm0
655 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
656 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
661 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
662 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
663 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
668 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
669 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
670 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
681 ; SSE2-NEXT: por %xmm1, %xmm0
682 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
683 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
684 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
689 ; SSSE3-NEXT: por %xmm1, %xmm0
690 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
691 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
692 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
697 ; SSE41-NEXT: por %xmm1, %xmm0
698 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
699 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
704 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
705 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
706 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
711 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
712 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
713 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
724 ; SSE2-NEXT: pxor %xmm1, %xmm0
725 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
726 ; SSE2-NEXT: pxor %xmm0, %xmm0
727 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
732 ; SSSE3-NEXT: pxor %xmm1, %xmm0
733 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
734 ; SSSE3-NEXT: pxor %xmm0, %xmm0
735 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
740 ; SSE41-NEXT: pxor %xmm1, %xmm0
741 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
742 ; SSE41-NEXT: pxor %xmm0, %xmm0
743 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
748 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
749 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
751 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
756 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
757 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
759 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
770 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
775 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
785 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
790 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
800 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
805 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
815 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
820 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
825 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
835 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
840 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
850 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
855 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
865 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
870 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
880 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
885 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
895 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,2]
900 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,2]
910 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
915 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
925 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,1]
930 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,1]
940 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
945 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
950 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
971 ; SSE-NEXT: movaps %xmm1, %xmm0
976 ; AVX-NEXT: vmovaps %xmm1, %xmm0
993 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
994 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,1]
995 ; SSE2-NEXT: movaps %xmm1, %xmm0
1000 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
1001 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,1]
1002 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1008 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
1009 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1015 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
1016 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1022 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
1023 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
1034 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
1035 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1041 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
1042 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1047 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1048 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1053 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1054 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1059 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1060 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1070 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
1071 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1],xmm1[0,2]
1076 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
1077 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1],xmm1[0,2]
1082 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
1083 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
1088 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
1089 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
1094 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1095 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
1105 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,3]
1110 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,0,3]
1120 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1121 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
1126 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1127 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
1132 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1133 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
1138 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1139 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
1144 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
1145 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
1155 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3]
1157 ; SSE2-NEXT: movaps %xmm1, %xmm0
1162 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3]
1164 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1169 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1170 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,3,0]
1175 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1176 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0]
1181 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1182 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0]
1192 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1193 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,0,3]
1198 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1199 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,0,3]
1204 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1205 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1210 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1211 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1216 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1217 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1231 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,3]
1236 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,1,3]
1246 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
1251 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
1261 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,3]
1266 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,3,2,3]
1276 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1281 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1286 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1296 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1301 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1311 ; SSE-NEXT: pshufd {{.*#+}} xmm0xmm0[0,1,0,1]
1316 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1321 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1331 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
1336 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
1346 ; SSE-NEXT: movaps %xmm1, %xmm0
1351 ; AVX-NEXT: vmovaps %xmm1, %xmm0
1361 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1362 ; SSE2-NEXT: movaps %xmm1, %xmm0
1367 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1368 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1373 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1378 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1388 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1393 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1403 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
1404 ; SSE-NEXT: movapd %xmm1, %xmm0
1409 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
1419 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1420 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1425 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1426 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1431 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1436 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1446 ; SSE-NEXT: movaps %xmm1, %xmm0
1451 ; AVX-NEXT: vmovaps %xmm1, %xmm0
1461 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1462 ; SSE2-NEXT: movaps %xmm1, %xmm0
1467 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1468 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1473 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1478 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1483 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1493 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1498 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1508 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
1509 ; SSE-NEXT: movdqa %xmm1, %xmm0
1514 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
1524 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1525 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1530 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1531 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1536 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1541 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1546 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1565 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1566 ; SSE2-NEXT: movaps %xmm1, %xmm0
1571 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1572 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1577 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1582 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1592 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1597 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1607 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1612 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1622 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1623 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1628 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1629 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1634 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1639 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1658 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1659 ; SSE2-NEXT: movaps %xmm1, %xmm0
1664 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1665 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1670 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1675 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1680 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1690 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1695 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1705 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1710 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1720 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1721 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1726 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1727 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1732 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1737 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1742 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1752 ; SSE-NEXT: movdqa %xmm0, %xmm2
1754 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1761 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1762 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1770 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1771 ; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1784 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1785 ; SSE-NEXT: movhpd (%rsi), %xmm0
1790 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1791 xmm0, %xmm0
1807 ; SSE-NEXT: movaps %xmm1, %xmm0
1812 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,1,2,0]
1823 ; SSE2-NEXT: movaps %xmm1, %xmm0
1828 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
1833 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
1838 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
1848 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
1849 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
1854 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
1855 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
1860 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1861 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,3]
1866 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1867 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,2,3]
1878 ; SSE-NEXT: movaps %xmm1, %xmm0
1883 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[1,1,2,3]
1897 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1898 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1899 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1900 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1901 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1902 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1908 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1909 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1910 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1911 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1912 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1913 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1919 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1920 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
1925 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1927 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1932 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1934 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1946 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1947 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1948 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1950 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1951 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1952 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1957 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1958 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1959 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1961 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1962 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1963 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1968 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1970 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1975 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1977 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1990 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1991 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1992 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1993 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1994 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1995 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2001 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2002 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2003 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2004 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2005 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2006 ; SSSE3-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2012 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2013 ; SSE41-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2018 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2020 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2032 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2033 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2034 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2036 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2037 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2038 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
2039 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
2044 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2045 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2046 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2048 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2049 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2050 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
2051 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
2057 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2058 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
2063 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2065 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
2070 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2072 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
2114 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2119 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2124 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2129 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2139 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
2141 ; SSE2-NEXT: movaps %xmm1, %xmm0
2146 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
2148 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2153 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
2158 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
2168 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
2169 ; SSE2-NEXT: movaps %xmm1, %xmm0
2174 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
2175 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2180 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2185 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2196 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2197 ; SSE-NEXT: movdqa %xmm1, %xmm0
2202 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2212 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2213 ; SSE-NEXT: movdqa %xmm1, %xmm0
2218 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2228 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2229 ; SSE-NEXT: movdqa %xmm1, %xmm0
2234 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2248 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2253 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2258 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2263 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2273 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2278 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2288 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2293 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2303 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2304 ; SSE-NEXT: movapd %xmm1, %xmm0
2309 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2319 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2320 ; SSE2-NEXT: movapd %xmm1, %xmm0
2325 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2326 ; SSSE3-NEXT: movapd %xmm1, %xmm0
2331 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
2336 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
2359 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2364 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2369 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2374 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
2384 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2389 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2394 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2399 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
2409 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
2414 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
2433 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2438 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2443 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2448 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2458 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2463 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2473 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2478 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2488 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2489 ; SSE-NEXT: movapd %xmm1, %xmm0
2494 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2504 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2505 ; SSE2-NEXT: movapd %xmm1, %xmm0
2510 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2511 ; SSSE3-NEXT: movapd %xmm1, %xmm0
2516 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
2521 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
2550 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2555 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2560 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2565 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
2575 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2580 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2585 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2590 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
2600 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
2605 ; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
2630 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,2,1,0]
2631 ; SSE-NEXT: movdqa %xmm0, %xmm1
2636 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2637 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2638 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
2639 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2657 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,2,1,0]
2663 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2664 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2665 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2684 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,0]
2685 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3]
2686 ; SSE2-NEXT: movaps %xmm1, %xmm0
2691 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,0]
2692 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3]
2693 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2698 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3]
2703 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3]
2714 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,0]
2715 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
2716 ; SSE2-NEXT: movaps %xmm1, %xmm0
2721 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,0]
2722 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
2723 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2728 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3]
2733 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3]
2744 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
2745 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
2750 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
2751 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
2756 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
2761 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
2772 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
2773 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
2778 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
2779 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
2784 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
2789 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
2800 ; SSE-NEXT: movaps %xmm0, %xmm1
2802 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,0,2]
2803 ; SSE-NEXT: addps %xmm0, %xmm1
2804 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2809 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,3,1,3]
2810 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
2811 ; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm1
2812 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2825 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2]
2826 ; SSE2-NEXT: movaps %xmm0, %xmm2
2828 ; SSE2-NEXT: addps %xmm0, %xmm2
2829 ; SSE2-NEXT: movaps %xmm2, %xmm0
2834 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2]
2835 ; SSSE3-NEXT: movaps %xmm0, %xmm2
2837 ; SSSE3-NEXT: addps %xmm0, %xmm2
2838 ; SSSE3-NEXT: movaps %xmm2, %xmm0
2843 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2]
2844 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
2845 ; SSE41-NEXT: addps %xmm1, %xmm0
2850 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,1,2]
2851 ; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
2852 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
2864 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
2865 ; SSE2-NEXT: movapd %xmm2, %xmm0
2866 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2]
2873 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
2874 ; SSSE3-NEXT: movapd %xmm2, %xmm0
2875 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2]
2882 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
2883 ; SSE41-NEXT: movapd %xmm0, %xmm1
2885 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[3,2]
2886 ; SSE41-NEXT: movaps %xmm1, %xmm0