Home | History | Annotate | Download | only in X86

Lines Matching full:next

8 ; AVX512F-NEXT:    vbroadcastsd %xmm0, %zmm0
9 ; AVX512F-NEXT: retq
13 ; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0
14 ; AVX512F-32-NEXT: retl
22 ; AVX512F-NEXT: vextractf32x4 $1, %zmm0, %xmm0
23 ; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0
24 ; AVX512F-NEXT: retq
28 ; AVX512F-32-NEXT: vextractf32x4 $1, %zmm0, %xmm0
29 ; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0
30 ; AVX512F-32-NEXT: retl
38 ; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm0
39 ; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0
40 ; AVX512F-NEXT: retq
44 ; AVX512F-32-NEXT: vextractf32x4 $2, %zmm0, %xmm0
45 ; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0
46 ; AVX512F-32-NEXT: retl
54 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
55 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
56 ; AVX512F-NEXT: retq
60 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
61 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
62 ; AVX512F-32-NEXT: retl
70 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
71 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
72 ; AVX512F-NEXT: retq
76 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
77 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
78 ; AVX512F-32-NEXT: retl
86 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
87 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
88 ; AVX512F-NEXT: retq
92 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
93 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
94 ; AVX512F-32-NEXT: retl
102 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
103 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
104 ; AVX512F-NEXT: retq
108 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
109 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
110 ; AVX512F-32-NEXT: retl
118 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
119 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
120 ; AVX512F-NEXT: retq
124 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
125 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
126 ; AVX512F-32-NEXT: retl
134 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
135 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
136 ; AVX512F-NEXT: retq
140 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
141 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
142 ; AVX512F-32-NEXT: retl
150 ; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
151 ; AVX512F-NEXT: movl $7, %eax
152 ; AVX512F-NEXT: vpinsrq $0, %rax, %xmm1, %xmm2
153 ; AVX512F-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm1
154 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
155 ; AVX512F-NEXT: retq
159 ; AVX512F-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
160 ; AVX512F-32-NEXT: movl $7, %eax
161 ; AVX512F-32-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
162 ; AVX512F-32-NEXT: vpxord %zmm2, %zmm2, %zmm2
163 ; AVX512F-32-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1
164 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
165 ; AVX512F-32-NEXT: retl
173 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
174 ; AVX512F-NEXT: retq
178 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
179 ; AVX512F-32-NEXT: retl
187 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
188 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
189 ; AVX512F-NEXT: retq
193 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
194 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
195 ; AVX512F-32-NEXT: retl
203 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
204 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
205 ; AVX512F-NEXT: retq
209 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
210 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
211 ; AVX512F-32-NEXT: retl
220 ; AVX512F-NEXT: vshufpd {{.*#+}} zmm0 = zmm1[0],zmm0[1],zmm1[2],zmm0[3],zmm1[4],zmm0[5],zmm1[6],zmm0[7]
221 ; AVX512F-NEXT: retq
225 ; AVX512F-32-NEXT: vshufpd {{.*#+}} zmm0 = zmm1[0],zmm0[1],zmm1[2],zmm0[3],zmm1[4],zmm0[5],zmm1[6],zmm0[7]
226 ; AVX512F-32-NEXT: retl
235 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
236 ; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
237 ; AVX512F-NEXT: retq
241 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
242 ; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
243 ; AVX512F-32-NEXT: retl
252 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,4,12,4,12]
253 ; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
254 ; AVX512F-NEXT: retq
258 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,4,0,12,0,4,0,12,0]
259 ; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
260 ; AVX512F-32-NEXT: retl
269 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
270 ; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
271 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
272 ; AVX512F-NEXT: retq
276 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
277 ; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
278 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
279 ; AVX512F-32-NEXT: retl
288 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
289 ; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
290 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
291 ; AVX512F-NEXT: retq
295 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
296 ; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
297 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
298 ; AVX512F-32-NEXT: retl
307 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
308 ; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
309 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
310 ; AVX512F-NEXT: retq
314 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
315 ; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
316 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
317 ; AVX512F-32-NEXT: retl
326 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,4,12,5,13]
327 ; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
328 ; AVX512F-NEXT: retq
332 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,4,0,12,0,5,0,13,0]
333 ; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
334 ; AVX512F-32-NEXT: retl
343 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,10,3,11,6,14,7,15]
344 ; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
345 ; AVX512F-NEXT: retq
349 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,0,10,0,3,0,11,0,6,0,14,0,7,0,15,0]
350 ; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
351 ; AVX512F-32-NEXT: retl
360 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,2,10,3,11]
361 ; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
362 ; AVX512F-NEXT: retq
366 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,2,0,10,0,3,0,11,0]
367 ; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
368 ; AVX512F-32-NEXT: retl
377 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
378 ; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
379 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
380 ; AVX512F-NEXT: retq
384 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
385 ; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
386 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
387 ; AVX512F-32-NEXT: retl
396 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,9,1,11,2,13,3,15]
397 ; AVX512F-NEXT
398 ; AVX512F-NEXT: retq
402 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,1,0,11,0,2,0,13,0,3,0,15,0]
403 ; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
404 ; AVX512F-32-NEXT: retl
413 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
414 ; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
415 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
416 ; AVX512F-NEXT: retq
420 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
421 ; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
422 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
423 ; AVX512F-32-NEXT: retl
432 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
433 ; AVX512F-NEXT: retq
437 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
438 ; AVX512F-32-NEXT: retl
447 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
448 ; AVX512F-NEXT: retq
452 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
453 ; AVX512F-32-NEXT: retl
462 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
463 ; AVX512F-NEXT: retq
467 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
468 ; AVX512F-32-NEXT: retl
477 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
478 ; AVX512F-NEXT: retq
482 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
483 ; AVX512F-32-NEXT: retl
492 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
493 ; AVX512F-NEXT: retq
497 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
498 ; AVX512F-32-NEXT: retl
507 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
508 ; AVX512F-NEXT: retq
512 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
513 ; AVX512F-32-NEXT: retl
522 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
523 ; AVX512F-NEXT: retq
527 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
528 ; AVX512F-32-NEXT: retl
537 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
538 ; AVX512F-NEXT: retq
542 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
543 ; AVX512F-32-NEXT: retl
552 ; AVX512F-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
553 ; AVX512F-NEXT: retq
557 ; AVX512F-32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
558 ; AVX512F-32-NEXT: retl
567 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
568 ; AVX512F-NEXT: retq
572 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6]
573 ; AVX512F-32-NEXT: retl
582 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7]
583 ; AVX512F-NEXT: retq
587 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7]
588 ; AVX512F-32-NEXT: retl
597 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
598 ; AVX512F-NEXT: retq
602 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
603 ; AVX512F-32-NEXT: retl
612 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
613 ; AVX512F-NEXT: retq
617 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
618 ; AVX512F-32-NEXT: retl
627 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
628 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
629 ; AVX512F-NEXT: retq
633 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
634 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
635 ; AVX512F-32-NEXT: retl
644 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
645 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
646 ; AVX512F-NEXT: retq
650 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
651 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
652 ; AVX512F-32-NEXT: retl
661 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
662 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
663 ; AVX512F-NEXT: retq
667 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
668 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
669 ; AVX512F-32-NEXT: retl
678 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
679 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
680 ; AVX512F-NEXT: retq
684 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
685 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
686 ; AVX512F-32-NEXT: retl
695 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
696 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
697 ; AVX512F-NEXT: retq
701 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
702 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
703 ; AVX512F-32-NEXT: retl
712 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
713 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
714 ; AVX512F-NEXT: retq
718 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
719 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
720 ; AVX512F-32-NEXT: retl
729 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
730 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
731 ; AVX512F-NEXT: retq
735 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
736 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
737 ; AVX512F-32-NEXT: retl
746 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
747 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
748 ; AVX512F-NEXT: retq
752 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
753 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
754 ; AVX512F-32-NEXT: retl
763 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
764 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
765 ; AVX512F-NEXT: retq
769 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
770 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
771 ; AVX512F-32-NEXT: retl
780 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7]
781 ; AVX512F-NEXT: retq
785 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7]
786 ; AVX512F-32-NEXT: retl
795 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,5,6,7]
796 ; AVX512F-NEXT: retq
800 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,5,6,7]
801 ; AVX512F-32-NEXT: retl
810 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,7]
811 ; AVX512F-NEXT: retq
815 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,7]
816 ; AVX512F-32-NEXT: retl
825 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,6]
826 ; AVX512F-NEXT: retq
830 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,6,6]
831 ; AVX512F-32-NEXT: retl
840 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
841 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
842 ; AVX512F-NEXT: retq
846 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
847 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
848 ; AVX512F-32-NEXT: retl
857 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
858 ; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
859 ; AVX512F-NEXT: retq
863 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
864 ; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
865 ; AVX512F-32-NEXT: retl
874 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,6]
875 ; AVX512F-NEXT: retq
879 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,6]
880 ; AVX512F-32-NEXT: retl
889 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,4,6,7]
890 ; AVX512F-NEXT: retq
894 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[1,1,3,3,4,4,6,7]
895 ; AVX512F-32-NEXT: retl
904 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,5,4,6,6]
905 ; AVX512F-NEXT: retq
909 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,5,4,6,6]
910 ; AVX512F-32-NEXT: retl
919 ; AVX512F-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,6]
920 ; AVX512F-NEXT: retq
924 ; AVX512F-32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,6]
925 ; AVX512F-32-NEXT: retl
934 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,11,12,0,4,5,2,8]
935 ; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
936 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
937 ; AVX512F-NEXT: retq
941 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,0,11,0,12,0,0,0,4,0,5,0,2,0,8,0]
942 ; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
943 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
944 ; AVX512F-32-NEXT: retl
953 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,5,1,1,2,3,5,10]
954 ; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
955 ; AVX512F-NEXT: retq
959 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,0,5,0,1,0,1,0,2,0,3,0,5,0,10,0]
960 ; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
961 ; AVX512F-32-NEXT: retl
970 ; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
971 ; AVX512F-NEXT: retq
975 ; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0
976 ; AVX512F-32-NEXT: retl
984 ; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm0
985 ; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
986 ; AVX512F-NEXT: retq
990 ; AVX512F-32-NEXT: vextracti32x4 $2, %zmm0, %xmm0
991 ; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0
992 ; AVX512F-32-NEXT: retl
1000 ; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm0
1001 ; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
1002 ; AVX512F-NEXT: retq
1006 ; AVX512F-32-NEXT: vextracti32x4 $3, %zmm0, %xmm0
1007 ; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0
1008 ; AVX512F-32-NEXT: retl
1017 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
1018 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1019 ; AVX512F-NEXT: retq
1023 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
1024 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1025 ; AVX512F-32-NEXT: retl
1034 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
1035 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1036 ; AVX512F-NEXT: retq
1040 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
1041 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1042 ; AVX512F-32-NEXT: retl
1051 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
1052 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1053 ; AVX512F-NEXT: retq
1057 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
1058 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1059 ; AVX512F-32-NEXT: retl
1068 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
1069 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1070 ; AVX512F-NEXT: retq
1074 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
1075 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1076 ; AVX512F-32-NEXT: retl
1085 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
1086 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1087 ; AVX512F-NEXT: retq
1091 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
1092 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1093 ; AVX512F-32-NEXT: retl
1102 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
1103 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1104 ; AVX512F-NEXT: retq
1108 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
1109 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1110 ; AVX512F-32-NEXT: retl
1119 ; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
1120 ; AVX512F-NEXT: movl $7, %eax
1121 ; AVX512F-NEXT: vpinsrq $0, %rax, %xmm1, %xmm2
1122 ; AVX512F-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm1
1123 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1124 ; AVX512F-NEXT: retq
1128 ; AVX512F-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
1129 ; AVX512F-32-NEXT: movl $7, %eax
1130 ; AVX512F-32-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
1131 ; AVX512F-32-NEXT: vpxord %zmm2, %zmm2, %zmm2
1132 ; AVX512F-32-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1
1133 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1134 ; AVX512F-32-NEXT: retl
1142 ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
1143 ; AVX512F-NEXT: retq
1147 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
1148 ; AVX512F-32-NEXT: retl
1158 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
1159 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1160 ; AVX512F-NEXT: retq
1164 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
1165 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1166 ; AVX512F-32-NEXT: retl
1175 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
1176 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1177 ; AVX512F-NEXT: retq
1181 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
1182 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1183 ; AVX512F-32-NEXT: retl
1192 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,9,2,11,4,13,6,15]
1193 ; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1194 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
1195 ; AVX512F-NEXT: retq
1199 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,2,0,11,0,4,0,13,0,6,0,15,0]
1200 ; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1201 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1202 ; AVX512F-32-NEXT: retl
1211 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
1212 ; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
1213 ; AVX512F-NEXT: retq
1217 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
1218 ; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
1219 ; AVX512F-32-NEXT: retl
1228 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,4,12,4,12]
1229 ; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
1230 ; AVX512F-NEXT: retq
1234 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,4,0,12,0,4,0,12,0]
1235 ; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
1236 ; AVX512F-32-NEXT: retl
1245 ; AVX512F-NEXT
1246 ; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1247 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
1248 ; AVX512F-NEXT: retq
1252 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
1253 ; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1254 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1255 ; AVX512F-32-NEXT: retl
1264 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
1265 ; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1266 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
1267 ; AVX512F-NEXT: retq
1271 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
1272 ; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1273 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1274 ; AVX512F-32-NEXT: retl
1283 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
1284 ; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1285 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
1286 ; AVX512F-NEXT: retq
1290 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
1291 ; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1292 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1293 ; AVX512F-32-NEXT: retl
1302 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,4,12,5,13]
1303 ; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
1304 ; AVX512F-NEXT: retq
1308 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,4,0,12,0,5,0,13,0]
1309 ; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
1310 ; AVX512F-32-NEXT: retl
1319 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,10,3,11,6,14,7,15]
1320 ; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
1321 ; AVX512F-NEXT: retq
1325 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,0,10,0,3,0,11,0,6,0,14,0,7,0,15,0]
1326 ; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
1327 ; AVX512F-32-NEXT: retl
1336 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,2,10,3,11]
1337 ; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
1338 ; AVX512F-NEXT: retq
1342 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,2,0,10,0,3,0,11,0]
1343 ; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
1344 ; AVX512F-32-NEXT: retl
1353 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
1354 ; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1355 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
1356 ; AVX512F-NEXT: retq
1360 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
1361 ; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1362 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1363 ; AVX512F-32-NEXT: retl
1372 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,9,1,11,2,13,3,15]
1373 ; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
1374 ; AVX512F-NEXT: retq
1378 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,1,0,11,0,2,0,13,0,3,0,15,0]
1379 ; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
1380 ; AVX512F-32-NEXT: retl
1389 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
1390 ; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1391 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
1392 ; AVX512F-NEXT: retq
1396 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
1397 ; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1398 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1399 ; AVX512F-32-NEXT: retl
1408 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
1409 ; AVX512F-NEXT: retq
1413 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
1414 ; AVX512F-32-NEXT: retl
1423 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
1424 ; AVX512F-NEXT: retq
1428 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
1429 ; AVX512F-32-NEXT: retl
1438 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
1439 ; AVX512F-NEXT: retq
1443 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
1444 ; AVX512F-32-NEXT: retl
1453 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
1454 ; AVX512F-NEXT: retq
1458 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
1459 ; AVX512F-32-NEXT: retl
1468 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
1469 ; AVX512F-NEXT: retq
1473 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
1474 ; AVX512F-32-NEXT: retl
1483 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
1484 ; AVX512F-NEXT: retq
1488 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
1489 ; AVX512F-32-NEXT: retl
1498 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
1499 ; AVX512F-NEXT: retq
1503 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
1504 ; AVX512F-32-NEXT: retl
1513 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
1514 ; AVX512F-NEXT: retq
1518 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
1519 ; AVX512F-32-NEXT: retl
1528 ; AVX512F-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
1529 ; AVX512F-NEXT: retq
1533 ; AVX512F-32-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13]
1534 ; AVX512F-32-NEXT: retl
1543 ; AVX512F-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
1544 ; AVX512F-NEXT: retq
1548 ; AVX512F-32-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
1549 ; AVX512F-32-NEXT: retl
1558 ; AVX512F-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
1559 ; AVX512F-NEXT: retq
1563 ; AVX512F-32-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
1564 ; AVX512F-32-NEXT: retl
1573 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
1574 ; AVX512F-NEXT: retq
1578 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
1579 ; AVX512F-32-NEXT: retl
1588 ; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
1589 ; AVX512F-NEXT: retq
1593 ; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
1594 ; AVX512F-32-NEXT: retl
1603 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
1604 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1605 ; AVX512F-NEXT: retq
1609 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
1610 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1611 ; AVX512F-32-NEXT: retl
1620 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
1621 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1622 ; AVX512F-NEXT: retq
1626 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
1627 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1628 ; AVX512F-32-NEXT: retl
1637 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
1638 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1639 ; AVX512F-NEXT: retq
1643 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
1644 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1645 ; AVX512F-32-NEXT: retl
1654 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
1655 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1656 ; AVX512F-NEXT: retq
1660 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
1661 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1662 ; AVX512F-32-NEXT: retl
1671 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
1672 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1673 ; AVX512F-NEXT: retq
1677 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
1678 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1679 ; AVX512F-32-NEXT: retl
1688 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
1689 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1690 ; AVX512F-NEXT: retq
1694 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
1695 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1696 ; AVX512F-32-NEXT: retl
1705 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
1706 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1707 ; AVX512F-NEXT: retq
1711 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
1712 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1713 ; AVX512F-32-NEXT: retl
1722 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
1723 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1724 ; AVX512F-NEXT: retq
1728 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
1729 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1730 ; AVX512F-32-NEXT: retl
1739 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
1740 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1741 ; AVX512F-NEXT: retq
1745 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
1746 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1747 ; AVX512F-32-NEXT: retl
1756 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,4,5,6,7]
1757 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1758 ; AVX512F-NEXT: retq
1762 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,4,0,5,0,6,0,7,0]
1763 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1764 ; AVX512F-32-NEXT: retl
1773 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,4,5,6,7]
1774 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1775 ; AVX512F-NEXT: retq
1779 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,4,0,5,0,6,0,7,0]
1780 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1781 ; AVX512F-32-NEXT: retl
1790 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,7]
1791 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1792 ; AVX512F-NEXT: retq
1796 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,7,0]
1797 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1798 ; AVX512F-32-NEXT: retl
1807 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,6]
1808 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1809 ; AVX512F-NEXT: retq
1813 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,6,0]
1814 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1815 ; AVX512F-32-NEXT: retl
1824 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
1825 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1826 ; AVX512F-NEXT: retq
1830 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
1831 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1832 ; AVX512F-32-NEXT: retl
1841 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
1842 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1843 ; AVX512F-NEXT: retq
1847 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
1848 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1849 ; AVX512F-32-NEXT: retl
1858 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,0,3,2,4,5,u,u>
1859 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1860 ; AVX512F-NEXT: retq
1864 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,0,0,0,3,0,2,0,4,0,5,0,u,u,u,u>
1865 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1866 ; AVX512F-32-NEXT: retl
1875 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,1,3,3,u,u,6,7>
1876 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1877 ; AVX512F-NEXT: retq
1881 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,0,1,0,3,0,3,0,u,u,u,u,6,0,7,0>
1882 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1883 ; AVX512F-32-NEXT: retl
1892 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,u,u,3,5,4,u,u>
1893 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1894 ; AVX512F-NEXT: retq
1898 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,u,u,3,0,5,0,4,0,u,u,u,u>
1899 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1900 ; AVX512F-32-NEXT: retl
1909 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <u,u,u,3,u,u,6,6>
1910 ; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
1911 ; AVX512F-NEXT: retq
1915 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <u,u,u,u,u,u,3,0,u,u,u,u,6,0,6,0>
1916 ; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
1917 ; AVX512F-32-NEXT: retl
1926 ; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,4,2,2,0,15,6,13]
1927 ; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1928 ; AVX512F-NEXT: vmovaps %zmm1, %zmm0
1929 ; AVX512F-NEXT: retq
1933 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,0,4,0,2,0,2,0,0,0,15,0,6,0,13,0]
1934 ; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
1935 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1936 ; AVX512F-32-NEXT: retl
1945 ; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1946 ; AVX512F-NEXT: retq
1950 ; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1951 ; AVX512F-32-NEXT: retl
1960 ; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
1961 ; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1962 ; AVX512F-NEXT: retq
1966 ; AVX512F-32-NEXT: vpxord %zmm1, %zmm1, %zmm1
1967 ; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1968 ; AVX512F-32-NEXT: retl
1977 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1978 ; AVX512F-NEXT: retq
1982 ; AVX512F-32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1983 ; AVX512F-32-NEXT: retl
1992 ; AVX512F-NEXT: vpxord %zmm0, %zmm0, %zmm0
1993 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1994 ; AVX512F-NEXT: retq
1998 ; AVX512F-32-NEXT: vpxord %zmm0, %zmm0, %zmm0
1999 ; AVX512F-32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
2000 ; AVX512F-32-NEXT: retl
2009 ; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2010 ; AVX512F-NEXT: retq
2014 ; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2015 ; AVX512F-32-NEXT: retl
2024 ; AVX512F-NEXT: vpxord %zmm0, %zmm0, %zmm0
2025 ; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2026 ; AVX512F-NEXT: retq
2030 ; AVX512F-32-NEXT: vpxord %zmm0, %zmm0, %zmm0
2031 ; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2032 ; AVX512F-32-NEXT: retl
2041 ; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2042 ; AVX512F-NEXT: retq
2046 ; AVX512F-32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2047 ; AVX512F-32-NEXT: retl
2056 ; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
2057 ; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2058 ; AVX512F-NEXT: retq
2062 ; AVX512F-32-NEXT: vpxord %zmm1, %zmm1, %zmm1
2063 ; AVX512F-32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2064 ; AVX512F-32-NEXT: retl
2072 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2073 ; AVX512F-NEXT: retq
2077 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
2078 ; AVX512F-32-NEXT: retl
2086 ; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
2087 ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm2
2088 ; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k1
2089 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1]
2090 ; AVX512F-NEXT: retq
2094 ; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2
2095 ; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
2096 ; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1
2097 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1]
2098 ; AVX512F-32-NEXT: retl
2107 ; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
2108 ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm2
2109 ; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k1
2110 ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1]
2111 ; AVX512F-NEXT: retq
2115 ; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2
2116 ; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
2117 ; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1
2118 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1]
2119 ; AVX512F-32-NEXT: retl
2128 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2129 ; AVX512F-NEXT: retq
2133 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2134 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
2135 ; AVX512F-32-NEXT: retl
2144 ; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
2145 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
2146 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
2147 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1]
2148 ; AVX512F-NEXT: retq
2152 ; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1
2153 ; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
2154 ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
2155 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2156 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1]
2157 ; AVX512F-32-NEXT: retl
2167 ; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
2168 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
2169 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
2170 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1]
2171 ; AVX512F-NEXT: retq
2175 ; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1
2176 ; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
2177 ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
2178 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2179 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1]
2180 ; AVX512F-32-NEXT: retl
2190 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
2191 ; AVX512F-NEXT: retq
2195 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
2196 ; AVX512F-32-NEXT: retl
2204 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1,4,5,6,7]
2205 ; AVX512F-NEXT: retq
2209 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1,4,5,6,7]
2210 ; AVX512F-32-NEXT: retl
2218 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1,0,1,6,7]
2219 ; AVX512F-NEXT: retq
2223 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1,0,1,6,7]
2224 ; AVX512F-32-NEXT: retl
2232 ; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
2233 ; AVX512F-NEXT: retq
2237 ; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
2238 ; AVX512F-32-NEXT: retl
2246 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1],zmm1[2,3,0,1]
2247 ; AVX512F-NEXT: retq
2251 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1],zmm1[2,3,0,1]
2252 ; AVX512F-32-NEXT: retl