Home | History | Annotate | Download | only in X86

Lines Matching full:next

10 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
11 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
12 ; AVX1-NEXT: retq
16 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
17 ; AVX2-NEXT: retq
25 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
26 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
27 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
28 ; AVX1-NEXT: retq
32 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
33 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
34 ; AVX2-NEXT: retq
42 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
43 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
44 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
45 ; AVX1-NEXT: retq
49 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
50 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
51 ; AVX2-NEXT: retq
59 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
60 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
61 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
62 ; AVX1-NEXT: retq
66 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
67 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
68 ; AVX2-NEXT: retq
76 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
77 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
78 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
79 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
80 ; AVX1-NEXT: retq
84 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
85 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
86 ; AVX2-NEXT: retq
94 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
95 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
96 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
97 ; AVX1-NEXT: retq
101 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
102 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
103 ; AVX2-NEXT: retq
111 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
112 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
113 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
114 ; AVX1-NEXT: retq
118 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
119 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
120 ; AVX2-NEXT: retq
128 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
129 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
130 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
131 ; AVX1-NEXT: retq
135 ; AVX2-NEXT: movl $7, %eax
136 ; AVX2-NEXT: vmovd %eax, %xmm1
137 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
138 ; AVX2-NEXT: retq
146 ; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
147 ; ALL-NEXT: retq
155 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
156 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
157 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
158 ; AVX1-NEXT: retq
162 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
163 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
164 ; AVX2-NEXT: retq
172 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
173 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
174 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
175 ; AVX1-NEXT: retq
179 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
180 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
181 ; AVX2-NEXT: retq
189 ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
190 ; ALL-NEXT: retq
198 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
199 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
200 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
201 ; AVX1-NEXT: retq
205 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
206 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
207 ; AVX2-NEXT: retq
215 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
216 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
217 ; ALL-NEXT: retq
225 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
226 ; ALL-NEXT: retq
234 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
235 ; ALL-NEXT: retq
243 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
244 ; ALL-NEXT: retq
252 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
253 ; ALL-NEXT: retq
261 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
262 ; ALL-NEXT: retq
270 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
271 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
272 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
273 ; AVX1-NEXT: retq
277 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
278 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
279 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
280 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
281 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
282 ; AVX2-NEXT: retq
290 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
291 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
292 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
293 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
294 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
295 ; AVX1-NEXT: retq
299 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
300 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
301 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
302 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
303 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
304 ; AVX2-NEXT: retq
312 ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
313 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
314 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
315 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
316 ; AVX1-NEXT: retq
320 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
321 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
322 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
323 ; AVX2-NEXT: retq
331 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
332 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
333 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
334 ; AVX1-NEXT: retq
338 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
339 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
340 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
341 ; AVX2-NEXT: retq
349 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
350 ; ALL-NEXT: retq
358 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
359 ; ALL-NEXT: retq
367 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
368 ; ALL-NEXT: retq
376 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
377 ; ALL-NEXT: retq
385 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
386 ; ALL-NEXT: retq
394 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
395 ; ALL-NEXT: retq
403 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
404 ; ALL-NEXT: retq
412 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
413 ; ALL-NEXT: retq
421 ; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
422 ; ALL-NEXT: retq
430 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
431 ; ALL-NEXT: retq
439 ; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
440 ; ALL-NEXT: retq
448 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
449 ; ALL-NEXT: retq
457 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
458 ; ALL-NEXT: retq
466 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
467 ; ALL-NEXT: retq
475 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
476 ; ALL-NEXT: retq
484 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
485 ; ALL-NEXT: retq
493 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
494 ; ALL-NEXT: retq
502 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
503 ; ALL-NEXT: retq
511 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
512 ; ALL-NEXT: retq
520 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
521 ; ALL-NEXT: retq
529 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
530 ; ALL-NEXT: retq
538 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
539 ; ALL-NEXT: retq
547 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
548 ; ALL-NEXT: retq
556 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
557 ; ALL-NEXT: retq
565 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
566 ; ALL-NEXT: retq
574 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
575 ; ALL-NEXT: retq
583 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
584 ; ALL-NEXT: retq
592 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
593 ; ALL-NEXT: retq
601 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
602 ; ALL-NEXT: retq
610 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
611 ; ALL-NEXT: retq
619 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
620 ; ALL-NEXT: retq
628 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
629 ; ALL-NEXT: retq
637 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
638 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
639 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
640 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
641 ; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
642 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
643 ; AVX1-NEXT: retq
647 ; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
648 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
649 ; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
650 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,2,1]
651 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
652 ; AVX2-NEXT: retq
660 ; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
661 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
662 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
663 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
664 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
665 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
666 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
667 ; AVX1-NEXT: retq
671 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
672 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
673 ; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
674 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
675 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
676 ; AVX2-NEXT: retq
684 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
685 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
686 ; AVX1-NEXT: retq
690 ; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
691 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
692 ; AVX2-NEXT: retq
700 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
701 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
702 ; ALL-NEXT: retq
710 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
711 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
712 ; ALL-NEXT: retq
720 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
721 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
722 ; ALL-NEXT: retq
730 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
731 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
732 ; ALL-NEXT: retq
740 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
741 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
742 ; ALL-NEXT: retq
750 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
751 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
752 ; ALL-NEXT: retq
760 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
761 ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
762 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
763 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
764 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
765 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
766 ; AVX1-NEXT: retq
770 ; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[1,3,1,3,5,7,5,7]
771 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3]
772 ; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,3,5,7,5,7]
773 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3]
774 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
775 NEXT: retq
783 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
784 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
785 ; ALL-NEXT: retq
793 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
794 ; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
795 ; ALL-NEXT: retq
803 ; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
804 ; ALL-NEXT: retq
812 ; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
813 ; ALL-NEXT: retq
821 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
822 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
823 ; ALL-NEXT: retq
831 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
832 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
833 ; AVX1-NEXT: retq
837 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
838 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
839 ; AVX2-NEXT: retq
847 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
848 ; ALL-NEXT: retq
856 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
857 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
858 ; ALL-NEXT: retq
866 ; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
867 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
868 ; ALL-NEXT: retq
876 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
877 ; ALL-NEXT: retq
885 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0
886 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
887 ; ALL-NEXT: retq
895 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
896 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
897 ; AVX1-NEXT: retq
901 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
902 ; AVX2-NEXT: retq
910 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
911 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
912 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
913 ; AVX1-NEXT: retq
917 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
918 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
919 ; AVX2-NEXT: retq
927 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
928 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,0]
929 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
930 ; AVX1-NEXT: retq
934 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
935 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
936 ; AVX2-NEXT: retq
944 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
945 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
946 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
947 ; AVX1-NEXT: retq
951 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
952 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
953 ; AVX2-NEXT: retq
961 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
962 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
963 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
964 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
965 ; AVX1-NEXT: retq
969 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
970 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
971 ; AVX2-NEXT: retq
979 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
980 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
981 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
982 ; AVX1-NEXT: retq
986 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
987 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
988 ; AVX2-NEXT: retq
996 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
997 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
998 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
999 ; AVX1-NEXT: retq
1003 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
1004 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1005 ; AVX2-NEXT: retq
1013 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
1014 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
1015 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
1016 ; AVX1-NEXT: retq
1020 ; AVX2-NEXT: movl $7, %eax
1021 ; AVX2-NEXT: vmovd %eax, %xmm1
1022 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1023 ; AVX2-NEXT: retq
1031 ; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
1032 ; AVX1-NEXT: retq
1036 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1037 ; AVX2-NEXT: retq
1045 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
1046 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1047 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1048 ; AVX1-NEXT: retq
1052 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
1053 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1054 ; AVX2-NEXT: retq
1062 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
1063 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1064 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1065 ; AVX1-NEXT: retq
1069 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
1070 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1071 ; AVX2-NEXT: retq
1079 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1080 ; AVX1-NEXT: retq
1084 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1085 ; AVX2-NEXT: retq
1093 ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
1094 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1095 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1096 ; AVX1-NEXT: retq
1100 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1101 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
1102 ; AVX2-NEXT: retq
1110 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
1111 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1112 ; AVX1-NEXT: retq
1116 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
1117 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1118 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1119 ; AVX2-NEXT: retq
1127 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
1128 ; AVX1-NEXT: retq
1132 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
1133 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1134 ; AVX2-NEXT: retq
1142 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
1143 ; AVX1-NEXT: retq
1147 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1148 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1149 ; AVX2-NEXT: retq
1157 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
1158 ; AVX1-NEXT: retq
1162 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
1163 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
1164 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1165 ; AVX2-NEXT: retq
1173 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1174 ; AVX1-NEXT: retq
1178 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1179 ; AVX2-NEXT
1187 ; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1188 ; AVX1-NEXT: retq
1192 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1193 ; AVX2-NEXT: retq
1201 ; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1202 ; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1203 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1204 ; AVX1-NEXT: retq
1208 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
1209 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1210 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1211 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1212 ; AVX2-NEXT: retq
1220 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
1221 ; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
1222 ; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1223 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
1224 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1225 ; AVX1-NEXT: retq
1229 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1230 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1231 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
1232 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1233 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1234 ; AVX2-NEXT: retq
1242 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
1243 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
1244 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1245 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1246 ; AVX1-NEXT: retq
1250 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1251 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1252 ; AVX2-NEXT: retq
1260 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1261 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1262 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1263 ; AVX1-NEXT: retq
1267 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1268 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1269 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1270 ; AVX2-NEXT: retq
1278 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1279 ; AVX1-NEXT: retq
1283 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1284 ; AVX2-NEXT: retq
1292 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1293 ; AVX1-NEXT: retq
1297 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1298 ; AVX2-NEXT: retq
1306 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1307 ; AVX1-NEXT: retq
1311 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1312 ; AVX2-NEXT: retq
1320 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1321 ; AVX1-NEXT: retq
1325 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1326 ; AVX2-NEXT: retq
1334 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1335 ; AVX1-NEXT: retq
1339 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1340 ; AVX2-NEXT: retq
1348 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1349 ; AVX1-NEXT: retq
1353 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1354 ; AVX2-NEXT: retq
1362 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1363 ; AVX1-NEXT: retq
1367 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1368 ; AVX2-NEXT: retq
1376 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1377 ; AVX1-NEXT: retq
1381 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1382 ; AVX2-NEXT: retq
1390 ; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1391 ; AVX1-NEXT: retq
1395 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1396 ; AVX2-NEXT: retq
1404 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1405 ; AVX1-NEXT: retq
1409 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1410 ; AVX2-NEXT: retq
1418 ; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1419 ; AVX1-NEXT: retq
1423 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1424 ; AVX2-NEXT: retq
1432 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1433 ; AVX1-NEXT: retq
1437 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1438 ; AVX2-NEXT: retq
1446 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1447 ; AVX1-NEXT: retq
1451 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1452 ; AVX2-NEXT: retq
1460 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
1461 ; AVX1-NEXT: retq
1465 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
1466 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1467 ; AVX2-NEXT: retq
1475 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
1476 ; AVX1-NEXT: retq
1480 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
1481 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1482 ; AVX2-NEXT: retq
1490 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
1491 ; AVX1-NEXT: retq
1495 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
1496 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1497 ; AVX2-NEXT: retq
1505 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
1506 ; AVX1-NEXT: retq
1510 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
1511 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1512 ; AVX2-NEXT: retq
1520 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
1521 ; AVX1-NEXT: retq
1525 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
1526 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1527 ; AVX2-NEXT: retq
1535 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
1536 ; AVX1-NEXT: retq
1540 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
1541 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1542 ; AVX2-NEXT: retq
1550 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
1551 ; AVX1-NEXT: retq
1555 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
1556 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1557 ; AVX2-NEXT: retq
1565 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
1566 ; AVX1-NEXT: retq
1570 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
1571 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1572 ; AVX2-NEXT: retq
1580 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
1581 ; AVX1-NEXT: retq
1585 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
1586 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1587 ; AVX2-NEXT: retq
1595 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
1596 ; AVX1-NEXT: retq
1600 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
1601 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1602 ; AVX2-NEXT: retq
1610 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
1611 ; AVX1-NEXT: retq
1615 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
1616 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1617 ; AVX2-NEXT: retq
1625 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
1626 ; AVX1-NEXT: retq
1630 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
1631 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1632 ; AVX2-NEXT: retq
1640 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
1641 ; AVX1-NEXT: retq
1645 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
1646 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1647 ; AVX2-NEXT: retq
1655 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
1656 ; AVX1-NEXT: retq
1660 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
1661 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1662 ; AVX2-NEXT: retq
1670 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
1671 ; AVX1-NEXT: retq
1675 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
1676 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1677 ; AVX2-NEXT: retq
1685 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
1686 ; AVX1-NEXT: retq
1690 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
1691 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1692 ; AVX2-NEXT: retq
1700 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
1701 ; AVX1-NEXT: retq
1705 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
1706 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1707 ; AVX2-NEXT: retq
1715 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
1716 ; AVX1-NEXT: retq
1720 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
1721 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1722 ; AVX2-NEXT: retq
1730 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
1731 ; AVX1-NEXT: retq
1735 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
1736 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1737 ; AVX2-NEXT: retq
1745 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
1746 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1747 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
1748 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
1749 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1750 ; AVX1-NEXT: retq
1754 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
1755 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,2,4,4,6,6]
1756 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,1,0,3]
1757 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1758 ; AVX2-NEXT: retq
1766 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
1767 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1768 ; AVX1-NEXT: retq
1772 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
1773 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1774 ; AVX2-NEXT: retq
1782 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1783 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1784 ; AVX1-NEXT: retq
1788 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1789 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1790 ; AVX2-NEXT: retq
1798 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1799 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1800 ; AVX1-NEXT: retq
1804 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1805 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1806 ; AVX2-NEXT: retq
1814 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1815 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1816 ; AVX1-NEXT: retq
1820 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1821 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1822 ; AVX2-NEXT: retq
1830 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
1831 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1832 ; AVX1-NEXT: retq
1836 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1837 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1838 ; AVX2-NEXT: retq
1846 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1847 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1848 ; AVX1-NEXT: retq
1852 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1853 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1854 ; AVX2-NEXT: retq
1862 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1863 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1864 ; AVX1-NEXT: retq
1868 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1869 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1870 ; AVX2-NEXT: retq
1878 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1879 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1880 ; AVX1-NEXT: retq
1884 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1885 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1886 ; AVX2-NEXT: retq
1894 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
1895 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1896 ; AVX1-NEXT: retq
1900 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1901 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1902 ; AVX2-NEXT: retq
1910 ; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
1911 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
1912 ; AVX1-NEXT: retq
1916 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
1917 ; AVX2-NEXT: retq
1925 ; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
1926 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
1927 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
1928 ; AVX1-NEXT: retq
1932 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
1933 ; AVX2-NEXT: retq
1941 ; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1942 ; AVX1-NEXT: retq
1946 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
1947 ; AVX2-NEXT: retq
1955 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1956 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1957 ; AVX1-NEXT: retq
1961 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1962 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
1963 ; AVX2-NEXT: retq
1971 ; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1972 ; ALL-NEXT: retq
1980 ; ALL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1981 ; ALL-NEXT: retq
1989 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
1990 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1991 ; AVX1-NEXT: retq
1995 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
1996 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
1997 ; AVX2-NEXT: retq
2005 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2006 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2007 ; AVX1-NEXT: retq
2011 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2012 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2013 ; AVX2-NEXT: retq
2021 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
2022 ; ALL-NEXT: retq
2032 ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
2033 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2034 ; AVX1-NEXT: retq
2038 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
2039 ; AVX2-NEXT: retq
2051 ; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
2052 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
2053 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
2054 ; AVX1-NEXT: retq
2058 ; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
2059 ; AVX2-NEXT: retq
2067 ; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
2068 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
2069 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
2070 ; AVX1-NEXT: retq
2074 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
2075 ; AVX2-NEXT: retq
2083 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
2084 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
2085 ; AVX1-NEXT: retq
2089 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
2090 ; AVX2-NEXT: retq
2098 ; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
2099 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
2100 ; AVX1-NEXT: retq
2104 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
2105 ; AVX2-NEXT: retq
2113 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
2114 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
2115 ; AVX1-NEXT: retq
2119 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
2120 ; AVX2-NEXT: retq
2128 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
2129 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
2130 ; AVX1-NEXT: retq
2134 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
2135 ; AVX2-NEXT: retq
2143 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2144 ; AVX1-NEXT: retq
2148 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2149 ; AVX2-NEXT: retq
2157 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2158 ; AVX1-NEXT: retq
2162 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2163 ; AVX2-NEXT: retq
2171 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2172 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2173 ; ALL-NEXT: retq
2186 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2187 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2188 ; ALL-NEXT: retq
2199 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2200 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
2201 ; ALL-NEXT: retq
2213 ; ALL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2214 ; ALL-NEXT: retq
2224 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
2225 ; AVX1-NEXT: retq
2229 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2230 ; AVX2-NEXT: retq
2240 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2241 ; ALL-NEXT: retq
2254 ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2255 ; ALL-NEXT: retq
2270 ; ALL-NEXT: vbroadcastss (%rdi), %ymm0
2271 ; ALL-NEXT: retq