Home | History | Annotate | Download | only in X86

Lines Matching full:next

11 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
12 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
13 ; AVX1-NEXT: retq
17 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
18 ; AVX2-NEXT: retq
22 ; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
23 ; AVX512VL-NEXT: retq
31 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
32 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
33 ; AVX1-NEXT: retq
37 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
38 ; AVX2-NEXT: retq
42 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
43 ; AVX512VL-NEXT: retq
51 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
52 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
53 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
54 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
55 ; AVX1-NEXT: retq
59 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
60 ; AVX2-NEXT: retq
64 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
65 ; AVX512VL-NEXT: retq
73 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
74 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
75 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
76 ; AVX1-NEXT: retq
80 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
81 ; AVX2-NEXT: retq
85 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
86 ; AVX512VL-NEXT: retq
94 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
95 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
96 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
97 ; AVX1-NEXT: retq
101 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
102 ; AVX2-NEXT: retq
106 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
107 ; AVX512VL-NEXT: retq
115 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
116 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
117 ; AVX1-NEXT: retq
121 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
122 ; AVX2-NEXT: retq
126 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
127 ; AVX512VL-NEXT: retq
135 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
136 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
137 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
138 ; AVX1-NEXT: retq
142 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
143 ; AVX2-NEXT: retq
147 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
148 ; AVX512VL-NEXT: retq
156 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
157 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
158 ; AVX1-NEXT: retq
162 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
163 ; AVX2-NEXT: retq
167 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
168 ; AVX512VL-NEXT: retq
176 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
177 ; ALL-NEXT: retq
186 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
187 ; ALL-NEXT: retq
195 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
196 ; ALL-NEXT: retq
205 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
206 ; ALL-NEXT: retq
214 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
215 ; ALL-NEXT: retq
223 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
224 ; ALL-NEXT: retq
232 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
233 ; ALL-NEXT: retq
241 ; ALL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
242 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
243 ; ALL-NEXT: retq
251 ; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
252 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
253 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
254 ; ALL-NEXT: retq
262 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
263 ; ALL-NEXT: retq
271 ; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
272 ; ALL-NEXT: retq
280 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
281 ; ALL-NEXT: retq
289 ; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
290 ; ALL-NEXT: retq
298 ; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
299 ; ALL-NEXT: retq
307 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
308 ; ALL-NEXT: retq
316 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
317 ; ALL-NEXT: retq
325 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
326 ; AVX1-NEXT: retq
330 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
331 ; AVX2-NEXT: retq
335 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
336 ; AVX512VL-NEXT: retq
344 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
345 ; AVX1-NEXT: retq
349 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
350 ; AVX2-NEXT: retq
354 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm1, %ymm0
355 ; AVX512VL-NEXT: retq
363 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
364 ; ALL-NEXT: retq
372 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
373 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
374 ; AVX1-NEXT: retq
378 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
379 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
380 ; AVX2-NEXT: retq
384 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
385 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
386 ; AVX512VL-NEXT: retq
394 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
395 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
396 ; AVX1-NEXT: retq
400 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
401 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
402 ; AVX2-NEXT: retq
406 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
407 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
408 ; AVX512VL-NEXT: retq
416 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
417 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
418 ; AVX1-NEXT: retq
422 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
423 ; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
424 ; AVX2-NEXT: retq
428 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
429 ; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
430 ; AVX512VL-NEXT: retq
438 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
439 ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
440 ; ALL-NEXT: retq
448 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
449 ; AVX1-NEXT
450 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
451 ; AVX1-NEXT: retq
455 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
456 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
457 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
458 ; AVX2-NEXT: retq
462 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
463 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
464 ; AVX512VL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
465 ; AVX512VL-NEXT: retq
473 ; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
474 ; ALL-NEXT: retq
482 ; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
483 ; ALL-NEXT: retq
491 ; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
492 ; ALL-NEXT: retq
500 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
501 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
502 ; AVX1-NEXT: retq
506 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
507 ; AVX2-NEXT: retq
511 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
512 ; AVX512VL-NEXT: retq
520 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
521 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
522 ; AVX1-NEXT: retq
526 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
527 ; AVX2-NEXT: retq
531 ; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
532 ; AVX512VL-NEXT: retq
540 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
541 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
542 ; AVX1-NEXT: retq
546 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
547 ; AVX2-NEXT: retq
551 ; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0
552 ; AVX512VL-NEXT: retq
560 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
561 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
562 ; AVX1-NEXT: retq
566 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
567 ; AVX2-NEXT: retq
571 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
572 ; AVX512VL-NEXT: retq
580 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
581 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
582 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
583 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
584 ; AVX1-NEXT: retq
588 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
589 ; AVX2-NEXT: retq
593 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
594 ; AVX512VL-NEXT: retq
602 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
603 ; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0]
604 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
605 ; AVX1-NEXT: retq
609 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
610 ; AVX2-NEXT: retq
614 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
615 ; AVX512VL-NEXT: retq
623 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
624 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
625 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
626 ; AVX1-NEXT: retq
630 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
631 ; AVX2-NEXT: retq
635 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
636 ; AVX512VL-NEXT: retq
644 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
645 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
646 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
647 ; AVX1-NEXT: retq
651 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
652 ; AVX2-NEXT: retq
656 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
657 ; AVX512VL-NEXT: retq
665 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
666 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
667 ; AVX1-NEXT: retq
671 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
672 ; AVX2-NEXT: retq
676 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
677 ; AVX512VL-NEXT: retq
685 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
686 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
687 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
688 ; AVX1-NEXT: retq
692 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
693 ; AVX2-NEXT: retq
697 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
698 ; AVX512VL-NEXT: retq
706 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
707 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
708 ; AVX1-NEXT: retq
712 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
713 ; AVX2-NEXT: retq
717 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
718 ; AVX512VL-NEXT: retq
726 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
727 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
728 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
729 ; AVX1-NEXT: retq
733 ; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
734 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
735 ; AVX2-NEXT: retq
739 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
740 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
741 ; AVX512VL-NEXT: retq
749 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
750 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
751 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
752 ; AVX1-NEXT: retq
756 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
757 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
758 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
759 ; AVX2-NEXT: retq
763 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm1
764 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
765 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
766 ; AVX512VL-NEXT: retq
774 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
775 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
776 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
777 ; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
778 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
779 ; AVX1-NEXT: retq
783 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
784 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
785 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
786 ; AVX2-NEXT: retq
790 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
791 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
792 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
793 ; AVX512VL-NEXT: retq
801 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
802 ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
803 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
804 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
805 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
806 ; AVX1-NEXT: retq
810 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
811 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
812 ; AVX2-NEXT: retq
816 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
817 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
818 ; AVX512VL-NEXT: retq
826 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
827 ; AVX1-NEXT: retq
831 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
832 ; AVX2-NEXT: retq
836 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
837 ; AVX512VL-NEXT: retq
845 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1]
846 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
847 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
848 ; AVX1-NEXT: retq
852 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
853 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
854 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
855 ; AVX2-NEXT: retq
859 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
860 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
861 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
862 ; AVX512VL-NEXT: retq
870 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
871 ; AVX1-NEXT: retq
875 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
876 ; AVX2-NEXT: retq
880 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0
881 ; AVX512VL-NEXT: retq
889 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
890 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
891 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
892 ; AVX1-NEXT: retq
896 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1]
897 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
898 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
899 ; AVX2-NEXT: retq
903 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1]
904 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
905 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
906 ; AVX512VL-NEXT: retq
914 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
915 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm0[1],xmm1[1]
916 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
917 ; AVX1-NEXT: retq
921 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
922 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
923 ; AVX2-NEXT: retq
927 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
928 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
929 ; AVX512VL-NEXT: retq
937 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
938 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3]
939 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
940 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
941 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
942 ; AVX1-NEXT: retq
946 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
947 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
948 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
949 ; AVX2-NEXT: retq
953 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
954 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
955 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
956 ; AVX512VL-NEXT: retq
964 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
965 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
966 ; AVX1-NEXT: retq
970 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
971 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
972 ; AVX2-NEXT: retq
976 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
977 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
978 ; AVX512VL-NEXT: retq
986 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
987 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
988 ; AVX1-NEXT: retq
992 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
993 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
994 ; AVX2-NEXT: retq
998 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
999 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1000 ; AVX512VL-NEXT: retq
1008 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1009 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1010 ; AVX1-NEXT: retq
1014 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1015 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1016 ; AVX2-NEXT: retq
1020 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1021 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1022 ; AVX512VL-NEXT: retq
1030 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1031 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1032 ; AVX1-NEXT: retq
1036 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1037 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1038 ; AVX2-NEXT: retq
1042 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1043 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
1044 ; AVX512VL-NEXT: retq
1052 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
1053 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1054 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1055 ; AVX1-NEXT: retq
1059 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
1060 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
1061 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
1062 ; AVX2-NEXT: retq
1066 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
1067 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
1068 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
1069 ; AVX512VL-NEXT: retq
1077 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
1078 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1079 ; AVX1-NEXT: retq
1083 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
1084 ; AVX2-NEXT: retq
1088 ; AVX512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
1089 ; AVX512VL-NEXT: retq
1097 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
1098 ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
1099 ; AVX1-NEXT: retq
1103 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
1104 ; AVX2-NEXT: retq
1108 ; AVX512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
1109 ; AVX512VL-NEXT: retq
1117 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1118 ; AVX1-NEXT: retq
1122 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1123 ; AVX2-NEXT: retq
1127 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
1128 ; AVX512VL-NEXT: retq
1136 ; ALL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1137 ; ALL-NEXT: retq
1145 ; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1146 ; ALL-NEXT: retq
1154 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1155 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1156 ; AVX1-NEXT: retq
1160 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
1161 ; AVX2-NEXT: retq
1165 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
1166 ; AVX512VL-NEXT: retq
1174 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
1175 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1176 ; AVX1-NEXT: retq
1180 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
1181 ; AVX2-NEXT: retq
1185 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
1186 ; AVX512VL-NEXT: retq
1205 ; ALL-NEXT: vmovq %rdi, %xmm0
1206 ; ALL-NEXT: retq
1215 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1216 ; ALL-NEXT: retq
1226 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
1227 ; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
1228 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
1229 ; AVX1-NEXT: retq
1233 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
1234 ; AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1
1235 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
1236 ; AVX2-NEXT: retq
1240 ; AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1241 ; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1242 ; AVX512VL-NEXT: retq
1251 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1252 ; ALL-NEXT: retq
1262 ; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
1263 ; ALL-NEXT: retq
1273 ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
1274 ; AVX1-NEXT: retq
1278 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
1279 ; AVX2-NEXT: retq
1283 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
1284 ; AVX512VL-NEXT: retq
1294 ; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
1295 ; ALL-NEXT: retq
1305 ; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1306 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1307 ; AVX1-NEXT: retq
1311 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
1312 ; AVX2-NEXT: retq
1316 ; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
1317 ; AVX512VL-NEXT: retq
1325 ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
1326 ; AVX1-NEXT: retq
1330 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
1331 ; AVX2-NEXT: retq
1335 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
1336 ; AVX512VL-NEXT: retq
1345 ; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
1346 ; ALL-NEXT: retq
1355 ; AVX1-NEXT
1356 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1357 ; AVX1-NEXT: retq
1361 ; AVX2-NEXT: vmovaps (%rdi), %xmm0
1362 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1363 ; AVX2-NEXT: retq
1367 ; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0
1368 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
1369 ; AVX512VL-NEXT: retq
1378 ; AVX1-NEXT: vmovaps (%rdi), %xmm0
1379 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1380 ; AVX1-NEXT: retq
1384 ; AVX2-NEXT: vmovaps (%rdi), %xmm0
1385 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1386 ; AVX2-NEXT: retq
1390 ; AVX512VL-NEXT: vmovapd (%rdi), %xmm0
1391 ; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
1392 ; AVX512VL-NEXT: retq
1401 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1402 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1403 ; AVX1-NEXT: retq
1407 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
1408 ; AVX2-NEXT: retq
1412 ; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
1413 ; AVX512VL-NEXT: retq
1423 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1424 ; AVX1-NEXT: retq
1428 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1429 ; AVX2-NEXT: retq
1433 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1434 ; AVX512VL-NEXT: retq
1447 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1448 ; AVX1-NEXT: retq
1452 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1453 ; AVX2-NEXT: retq
1457 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1458 ; AVX512VL-NEXT: retq
1468 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1469 ; AVX1-NEXT: retq
1473 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1474 ; AVX2-NEXT: retq
1478 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
1479 ; AVX512VL-NEXT: retq
1492 ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
1493 ; AVX1-NEXT: retq
1497 ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
1498 ; AVX2-NEXT: retq
1502 ; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
1503 ; AVX512VL-NEXT: retq