Home | History | Annotate | Download | only in X86

Lines Matching full:next

10 ; X32-NEXT:    vpbroadcastd %xmm0, %xmm0
11 ; X32-NEXT: retl
15 ; X64-NEXT: vpbroadcastd %xmm0, %xmm0
16 ; X64-NEXT: retq
26 ; X32-NEXT: pushl %eax
27 ; X32-NEXT: .Ltmp0:
28 ; X32-NEXT: .cfi_def_cfa_offset 8
29 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
30 ; X32-NEXT: andb $15, %al
31 ; X32-NEXT: movb %al, (%esp)
32 ; X32-NEXT: movzbl (%esp), %eax
33 ; X32-NEXT: kmovw %eax, %k1
34 ; X32-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1}
35 ; X32-NEXT: popl %eax
36 ; X32-NEXT: retl
40 ; X64-NEXT: andb $15, %dil
41 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
42 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
43 ; X64-NEXT: kmovw %eax, %k1
44 ; X64-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1}
45 ; X64-NEXT: retq
59 ; X32-NEXT: pushl %eax
60 ; X32-NEXT: .Ltmp1:
61 ; X32-NEXT: .cfi_def_cfa_offset 8
62 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
63 ; X32-NEXT: andb $15, %al
64 ; X32-NEXT: movb %al, (%esp)
65 ; X32-NEXT: movzbl (%esp), %eax
66 ; X32-NEXT: kmovw %eax, %k1
67 ; X32-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z}
68 ; X32-NEXT: popl %eax
69 ; X32-NEXT: retl
73 ; X64-NEXT: andb $15, %dil
74 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
75 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
76 ; X64-NEXT: kmovw %eax, %k1
77 ; X64-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z}
78 ; X64-NEXT: retq
91 ; X32-NEXT: vpbroadcastd %xmm0, %ymm0
92 ; X32-NEXT: retl
96 ; X64-NEXT: vpbroadcastd %xmm0, %ymm0
97 ; X64-NEXT: retq
107 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
108 ; X32-NEXT: kmovw %eax, %k1
109 ; X32-NEXT: vpbroadcastd %xmm1, %ymm0 {%k1}
110 ; X32-NEXT: retl
114 ; X64-NEXT: kmovw %edi, %k1
115 ; X64-NEXT: vpbroadcastd %xmm1, %ymm0 {%k1}
116 ; X64-NEXT: retq
129 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
130 ; X32-NEXT: kmovw %eax, %k1
131 ; X32-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z}
132 ; X32-NEXT: retl
136 ; X64-NEXT: kmovw %edi, %k1
137 ; X64-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z}
138 ; X64-NEXT: retq
150 ; X32-NEXT: vpbroadcastq %xmm0, %xmm0
151 ; X32-NEXT: retl
155 ; X64-NEXT: vpbroadcastq %xmm0, %xmm0
156 ; X64-NEXT: retq
164 ; X32-NEXT: pushl %eax
165 ; X32-NEXT: .Ltmp2:
166 ; X32-NEXT: .cfi_def_cfa_offset 8
167 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
168 ; X32-NEXT: andb $3, %al
169 ; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
170 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
171 ; X32-NEXT: kmovw %eax, %k1
172 ; X32-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1}
173 ; X32-NEXT: popl %eax
174 ; X32-NEXT: retl
178 ; X64-NEXT: andb $3, %dil
179 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
180 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
181 ; X64-NEXT: kmovw %eax, %k1
182 ; X64-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1}
183 ; X64-NEXT: retq
194 ; X32-NEXT: pushl %eax
195 ; X32-NEXT: .Ltmp3:
196 ; X32-NEXT: .cfi_def_cfa_offset 8
197 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
198 ; X32-NEXT: andb $3, %al
199 ; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
200 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
201 ; X32-NEXT: kmovw %eax, %k1
202 ; X32-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z}
203 ; X32-NEXT: popl %eax
204 ; X32-NEXT: retl
208 ; X64-NEXT: andb $3, %dil
209 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
210 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
211 ; X64-NEXT: kmovw %eax, %k1
212 ; X64-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z}
213 ; X64-NEXT: retq
224 ; X32-NEXT: vpbroadcastq %xmm0, %ymm0
225 ; X32-NEXT: retl
229 ; X64-NEXT: vpbroadcastq %xmm0, %ymm0
230 ; X64-NEXT: retq
238 ; X32-NEXT: pushl %eax
239 ; X32-NEXT: .Ltmp4:
240 ; X32-NEXT: .cfi_def_cfa_offset 8
241 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
242 ; X32-NEXT: andb $15, %al
243 ; X32-NEXT: movb %al, (%esp)
244 ; X32-NEXT: movzbl (%esp), %eax
245 ; X32-NEXT: kmovw %eax, %k1
246 ; X32-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1}
247 ; X32-NEXT: popl %eax
248 ; X32-NEXT: retl
252 ; X64-NEXT: andb $15, %dil
253 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
254 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
255 ; X64-NEXT: kmovw %eax, %k1
256 ; X64-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1}
257 ; X64-NEXT: retq
268 ; X32-NEXT: pushl %eax
269 ; X32-NEXT: .Ltmp5:
270 ; X32-NEXT: .cfi_def_cfa_offset 8
271 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
272 ; X32-NEXT: andb $15, %al
273 ; X32-NEXT: movb %al, (%esp)
274 ; X32-NEXT: movzbl (%esp), %eax
275 ; X32-NEXT: kmovw %eax, %k1
276 ; X32-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z}
277 ; X32-NEXT: popl %eax
278 ; X32-NEXT: retl
282 ; X64-NEXT: andb $15, %dil
283 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
284 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
285 ; X64-NEXT: kmovw %eax, %k1
286 ; X64-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z}
287 ; X64-NEXT: retq
298 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
299 ; X32-NEXT: retl
303 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
304 ; X64-NEXT: retq
312 ; X32-NEXT: pushl %eax
313 ; X32-NEXT: .Ltmp6:
314 ; X32-NEXT: .cfi_def_cfa_offset 8
315 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
316 ; X32-NEXT: andb $3, %al
317 ; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
318 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
319 ; X32-NEXT: kmovw %eax, %k1
320 ; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
321 ; X32-NEXT: popl %eax
322 ; X32-NEXT: retl
326 ; X64-NEXT: andb $3, %dil
327 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
328 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
329 ; X64-NEXT: kmovw %eax, %k1
330 ; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
331 ; X64-NEXT: retq
342 ; X32-NEXT: pushl %eax
343 ; X32-NEXT: .Ltmp7:
344 ; X32-NEXT: .cfi_def_cfa_offset 8
345 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
346 ; X32-NEXT: andb $3, %al
347 ; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
348 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
349 ; X32-NEXT: kmovw %eax, %k1
350 ; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
351 ; X32-NEXT: popl %eax
352 ; X32-NEXT: retl
356 ; X64-NEXT: andb $3, %dil
357 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
358 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
359 ; X64-NEXT: kmovw %eax, %k1
360 ; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
361 ; X64-NEXT: retq
372 ; X32-NEXT: vbroadcastsd %xmm0, %ymm0
373 ; X32-NEXT: retl
377 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0
378 ; X64-NEXT: retq
386 ; X32-NEXT: pushl %eax
387 ; X32-NEXT: .Ltmp8:
388 ; X32-NEXT: .cfi_def_cfa_offset 8
389 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
390 ; X32-NEXT: andb $15, %al
391 ; X32-NEXT: movb %al, (%esp)
392 ; X32-NEXT: movzbl (%esp), %eax
393 ; X32-NEXT: kmovw %eax, %k1
394 ; X32-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
395 ; X32-NEXT: popl %eax
396 ; X32-NEXT: retl
400 ; X64-NEXT: andb $15, %dil
401 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
402 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
403 ; X64-NEXT: kmovw %eax, %k1
404 ; X64-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
405 ; X64-NEXT: retq
416 ; X32-NEXT: pushl %eax
417 ; X32-NEXT: .Ltmp9:
418 ; X32-NEXT: .cfi_def_cfa_offset 8
419 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
420 ; X32-NEXT: andb $15, %al
421 ; X32-NEXT: movb %al, (%esp)
422 ; X32-NEXT: movzbl (%esp), %eax
423 ; X32-NEXT: kmovw %eax, %k1
424 ; X32-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
425 ; X32-NEXT: popl %eax
426 ; X32-NEXT: retl
430 ; X64-NEXT: andb $15, %dil
431 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
432 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
433 ; X64-NEXT: kmovw %eax, %k1
434 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
435 ; X64-NEXT: retq
446 ; X32-NEXT: vbroadcastss %xmm0, %xmm0
447 ; X32-NEXT: retl
451 ; X64-NEXT: vbroadcastss %xmm0, %xmm0
452 ; X64-NEXT: retq
460 ; X32-NEXT: pushl %eax
461 ; X32-NEXT: .Ltmp10:
462 ; X32-NEXT: .cfi_def_cfa_offset 8
463 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
464 ; X32-NEXT: andb $15, %al
465 ; X32-NEXT: movb %al, (%esp)
466 ; X32-NEXT: movzbl (%esp), %eax
467 ; X32-NEXT: kmovw %eax, %k1
468 ; X32-NEXT: vbroadcastss %xmm1, %xmm0 {%k1}
469 ; X32-NEXT: popl %eax
470 ; X32-NEXT: retl
474 ; X64-NEXT: andb $15, %dil
475 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
476 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
477 ; X64-NEXT: kmovw %eax, %k1
478 ; X64-NEXT: vbroadcastss %xmm1, %xmm0 {%k1}
479 ; X64-NEXT: retq
490 ; X32-NEXT: pushl %eax
491 ; X32-NEXT: .Ltmp11:
492 ; X32-NEXT: .cfi_def_cfa_offset 8
493 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
494 ; X32-NEXT: andb $15, %al
495 ; X32-NEXT: movb %al, (%esp)
496 ; X32-NEXT: movzbl (%esp), %eax
497 ; X32-NEXT: kmovw %eax, %k1
498 ; X32-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
499 ; X32-NEXT: popl %eax
500 ; X32-NEXT: retl
504 ; X64-NEXT: andb $15, %dil
505 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
506 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
507 ; X64-NEXT: kmovw %eax, %k1
508 ; X64-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
509 ; X64-NEXT: retq
520 ; X32-NEXT: vbroadcastss %xmm0, %ymm0
521 ; X32-NEXT: retl
525 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
526 ; X64-NEXT: retq
534 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
535 ; X32-NEXT: kmovw %eax, %k1
536 ; X32-NEXT: vbroadcastss %xmm1, %ymm0 {%k1}
537 ; X32-NEXT: retl
541 ; X64-NEXT: kmovw %edi, %k1
542 ; X64-NEXT: vbroadcastss %xmm1, %ymm0 {%k1}
543 ; X64-NEXT: retq
553 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
554 ; X32-NEXT: kmovw %eax, %k1
555 ; X32-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
556 ; X32-NEXT: retl
560 ; X64-NEXT: kmovw %edi, %k1
561 ; X64-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
562 ; X64-NEXT: retq
572 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
573 ; X32-NEXT: retl
577 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
578 ; X64-NEXT: retq
586 ; X32-NEXT: pushl %eax
587 ; X32-NEXT: .Ltmp12:
588 ; X32-NEXT: .cfi_def_cfa_offset 8
589 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
590 ; X32-NEXT: andb $3, %al
591 ; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
592 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
593 ; X32-NEXT: kmovw %eax, %k1
594 ; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
595 ; X32-NEXT: popl %eax
596 ; X32-NEXT: retl
600 ; X64-NEXT: andb $3, %dil
601 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
602 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
603 ; X64-NEXT: kmovw %eax, %k1
604 ; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
605 ; X64-NEXT: retq
616 ; X32-NEXT: pushl %eax
617 ; X32-NEXT: .Ltmp13:
618 ; X32-NEXT: .cfi_def_cfa_offset 8
619 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
620 ; X32-NEXT: andb $3, %al
621 ; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
622 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
623 ; X32-NEXT: kmovw %eax, %k1
624 ; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
625 ; X32-NEXT: popl %eax
626 ; X32-NEXT: retl
630 ; X64-NEXT: andb $3, %dil
631 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
632 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
633 ; X64-NEXT: kmovw %eax, %k1
634 ; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
635 ; X64-NEXT: retq
646 ; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
647 ; X32-NEXT: retl
651 ; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
652 ; X64-NEXT: retq
660 ; X32-NEXT: pushl %eax
661 ; X32-NEXT: .Ltmp14:
662 ; X32-NEXT: .cfi_def_cfa_offset 8
663 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
664 ; X32-NEXT: andb $15, %al
665 ; X32-NEXT: movb %al, (%esp)
666 ; X32-NEXT: movzbl (%esp), %eax
667 ; X32-NEXT: kmovw %eax, %k1
668 ; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2]
669 ; X32-NEXT: popl %eax
670 ; X32-NEXT: retl
674 ; X64-NEXT: andb $15, %dil
675 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
676 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
677 ; X64-NEXT: kmovw %eax, %k1
678 ; X64-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2]
679 ; X64-NEXT: retq
690 ; X32-NEXT: pushl %eax
691 ; X32-NEXT: .Ltmp15:
692 ; X32-NEXT: .cfi_def_cfa_offset 8
693 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
694 ; X32-NEXT: andb $15, %al
695 ; X32-NEXT: movb %al, (%esp)
696 ; X32-NEXT: movzbl (%esp), %eax
697 ; X32-NEXT: kmovw %eax, %k1
698 ; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
699 ; X32-NEXT: popl %eax
700 ; X32-NEXT: retl
704 ; X64-NEXT: andb $15, %dil
705 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
706 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
707 ; X64-NEXT: kmovw %eax, %k1
708 ; X64-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
709 ; X64-NEXT: retq
720 ; X32-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
721 ; X32-NEXT: retl
725 ; X64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
726 ; X64-NEXT: retq
734 ; X32-NEXT: pushl %eax
735 ; X32-NEXT: .Ltmp16:
736 ; X32-NEXT: .cfi_def_cfa_offset 8
737 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
738 ; X32-NEXT: andb $15, %al
739 ; X32-NEXT: movb %al, (%esp)
740 ; X32-NEXT: movzbl (%esp), %eax
741 ; X32-NEXT: kmovw %eax, %k1
742 ; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3]
743 ; X32-NEXT: popl %eax
744 ; X32-NEXT: retl
748 ; X64-NEXT: andb $15, %dil
749 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
750 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
751 ; X64-NEXT: kmovw %eax, %k1
752 ; X64-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3]
753 ; X64-NEXT: retq
764 ; X32-NEXT: pushl %eax
765 ; X32-NEXT: .Ltmp17:
766 ; X32-NEXT: .cfi_def_cfa_offset 8
767 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
768 ; X32-NEXT: andb $15, %al
769 ; X32-NEXT: movb %al, (%esp)
770 ; X32-NEXT: movzbl (%esp), %eax
771 ; X32-NEXT: kmovw %eax, %k1
772 ; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
773 ; X32-NEXT: popl %eax
774 ; X32-NEXT: retl
778 ; X64-NEXT: andb $15, %dil
779 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
780 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
781 ; X64-NEXT: kmovw %eax, %k1
782 ; X64-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
783 ; X64-NEXT: retq
794 ; X32-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
795 ; X32-NEXT: retl
799 ; X64-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
800 ; X64-NEXT: retq
808 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
809 ; X32-NEXT: kmovw %eax, %k1
810 ; X32-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = ymm1[1,1,3,3,5,5,7,7]
811 ; X32-NEXT: retl
815 ; X64-NEXT: kmovw %edi, %k1
816 ; X64-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = ymm1[1,1,3,3,5,5,7,7]
817 ; X64-NEXT: retq
827 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
828 ; X32-NEXT: kmovw %eax, %k1
829 ; X32-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
830 ; X32-NEXT: retl
834 ; X64-NEXT: kmovw %edi, %k1
835 ; X64-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
836 ; X64-NEXT: retq
846 ; X32-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
847 ; X32-NEXT: retl
851 ; X64-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
852 ; X64-NEXT: retq
860 ; X32-NEXT: pushl %eax
861 ; X32-NEXT: .Ltmp18:
862 ; X32-NEXT: .cfi_def_cfa_offset 8
863 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
864 ; X32-NEXT: andb $15, %al
865 ; X32-NEXT: movb %al, (%esp)
866 ; X32-NEXT: movzbl (%esp), %eax
867 ; X32-NEXT: kmovw %eax, %k1
868 ; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2]
869 ; X32-NEXT: popl %eax
870 ; X32-NEXT: retl
874 ; X64-NEXT: andb $15, %dil
875 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
876 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
877 ; X64-NEXT: kmovw %eax, %k1
878 ; X64-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2]
879 ; X64-NEXT: retq
890 ; X32-NEXT: pushl %eax
891 ; X32-NEXT: .Ltmp19:
892 ; X32-NEXT: .cfi_def_cfa_offset 8
893 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
894 ; X32-NEXT: andb $15, %al
895 ; X32-NEXT: movb %al, (%esp)
896 ; X32-NEXT: movzbl (%esp), %eax
897 ; X32-NEXT: kmovw %eax, %k1
898 ; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
899 ; X32-NEXT: popl %eax
900 ; X32-NEXT: retl
904 ; X64-NEXT: andb $15, %dil
905 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
906 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
907 ; X64-NEXT: kmovw %eax, %k1
908 ; X64-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
909 ; X64-NEXT: retq
920 ; X32-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
921 ; X32-NEXT: retl
925 ; X64-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
926 ; X64-NEXT: retq
934 NEXT: movb {{[0-9]+}}(%esp), %al
935 ; X32-NEXT: kmovw %eax, %k1
936 ; X32-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2,4,4,6,6]
937 ; X32-NEXT: retl
941 ; X64-NEXT: kmovw %edi, %k1
942 ; X64-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2,4,4,6,6]
943 ; X64-NEXT: retq
953 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
954 ; X32-NEXT: kmovw %eax, %k1
955 ; X32-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
956 ; X32-NEXT: retl
960 ; X64-NEXT: kmovw %edi, %k1
961 ; X64-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
962 ; X64-NEXT: retq
972 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
973 ; X32-NEXT: retl
977 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
978 ; X64-NEXT: retq
986 ; X32-NEXT: pushl %eax
987 ; X32-NEXT: .Ltmp20:
988 ; X32-NEXT: .cfi_def_cfa_offset 8
989 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
990 ; X32-NEXT: andb $15, %al
991 ; X32-NEXT: movb %al, (%esp)
992 ; X32-NEXT: movzbl (%esp), %eax
993 ; X32-NEXT: kmovw %eax, %k1
994 ; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
995 ; X32-NEXT: popl %eax
996 ; X32-NEXT: retl
1000 ; X64-NEXT: andb $15, %dil
1001 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1002 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1003 ; X64-NEXT: kmovw %eax, %k1
1004 ; X64-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
1005 ; X64-NEXT: retq
1016 ; X32-NEXT: pushl %eax
1017 ; X32-NEXT: .Ltmp21:
1018 ; X32-NEXT: .cfi_def_cfa_offset 8
1019 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1020 ; X32-NEXT: andb $15, %al
1021 ; X32-NEXT: movb %al, (%esp)
1022 ; X32-NEXT: movzbl (%esp), %eax
1023 ; X32-NEXT: kmovw %eax, %k1
1024 ; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
1025 ; X32-NEXT: popl %eax
1026 ; X32-NEXT: retl
1030 ; X64-NEXT: andb $15, %dil
1031 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1032 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1033 ; X64-NEXT: kmovw %eax, %k1
1034 ; X64-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
1035 ; X64-NEXT: retq
1046 ; X32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
1047 ; X32-NEXT: retl
1051 ; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
1052 ; X64-NEXT: retq
1060 ; X32-NEXT: pushl %eax
1061 ; X32-NEXT: .Ltmp22:
1062 ; X32-NEXT: .cfi_def_cfa_offset 8
1063 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1064 ; X32-NEXT: andb $15, %al
1065 ; X32-NEXT: movb %al, (%esp)
1066 ; X32-NEXT: movzbl (%esp), %eax
1067 ; X32-NEXT: kmovw %eax, %k1
1068 ; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
1069 ; X32-NEXT: popl %eax
1070 ; X32-NEXT: retl
1074 ; X64-NEXT: andb $15, %dil
1075 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1076 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1077 ; X64-NEXT: kmovw %eax, %k1
1078 ; X64-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
1079 ; X64-NEXT: retq
1090 ; X32-NEXT: pushl %eax
1091 ; X32-NEXT: .Ltmp23:
1092 ; X32-NEXT: .cfi_def_cfa_offset 8
1093 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1094 ; X32-NEXT: andb $15, %al
1095 ; X32-NEXT: movb %al, (%esp)
1096 ; X32-NEXT: movzbl (%esp), %eax
1097 ; X32-NEXT: kmovw %eax, %k1
1098 ; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
1099 ; X32-NEXT: popl %eax
1100 ; X32-NEXT: retl
1104 ; X64-NEXT: andb $15, %dil
1105 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1106 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1107 ; X64-NEXT: kmovw %eax, %k1
1108 ; X64-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
1109 ; X64-NEXT: retq
1120 ; X32-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1121 ; X32-NEXT: retl
1125 ; X64-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1126 ; X64-NEXT: retq
1134 ; X32-NEXT: pushl %eax
1135 ; X32-NEXT: .Ltmp24:
1136 ; X32-NEXT: .cfi_def_cfa_offset 8
1137 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1138 ; X32-NEXT: andb $3, %al
1139 ; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
1140 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1141 ; X32-NEXT: kmovw %eax, %k1
1142 ; X32-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1]
1143 ; X32-NEXT: popl %eax
1144 ; X32-NEXT: retl
1148 ; X64-NEXT: andb $3, %dil
1149 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1150 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1151 ; X64-NEXT: kmovw %eax, %k1
1152 ; X64-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1]
1153 ; X64-NEXT: retq
1164 ; X32-NEXT: pushl %eax
1165 ; X32-NEXT: .Ltmp25:
1166 ; X32-NEXT: .cfi_def_cfa_offset 8
1167 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1168 ; X32-NEXT: andb $3, %al
1169 ; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
1170 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1171 ; X32-NEXT: kmovw %eax, %k1
1172 ; X32-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
1173 ; X32-NEXT: popl %eax
1174 ; X32-NEXT: retl
1178 ; X64-NEXT: andb $3, %dil
1179 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1180 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1181 ; X64-NEXT: kmovw %eax, %k1
1182 ; X64-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
1183 ; X64-NEXT: retq
1194 ; X32-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
1195 ; X32-NEXT: retl
1199 ; X64-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
1200 ; X64-NEXT: retq
1208 ; X32-NEXT: pushl %eax
1209 ; X32-NEXT: .Ltmp26:
1210 ; X32-NEXT: .cfi_def_cfa_offset 8
1211 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1212 ; X32-NEXT: andb $15, %al
1213 ; X32-NEXT: movb %al, (%esp)
1214 ; X32-NEXT: movzbl (%esp), %eax
1215 ; X32-NEXT: kmovw %eax, %k1
1216 ; X32-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2]
1217 ; X32-NEXT: popl %eax
1218 ; X32-NEXT: retl
1222 ; X64-NEXT: andb $15, %dil
1223 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1224 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1225 ; X64-NEXT: kmovw %eax, %k1
1226 ; X64-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2]
1227 ; X64-NEXT: retq
1238 ; X32-NEXT: pushl %eax
1239 ; X32-NEXT: .Ltmp27:
1240 ; X32-NEXT: .cfi_def_cfa_offset 8
1241 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1242 ; X32-NEXT: andb $15, %al
1243 ; X32-NEXT: movb %al, (%esp)
1244 ; X32-NEXT: movzbl (%esp), %eax
1245 ; X32-NEXT: kmovw %eax, %k1
1246 ; X32-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
1247 ; X32-NEXT: popl %eax
1248 ; X32-NEXT: retl
1252 ; X64-NEXT: andb $15, %dil
1253 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1254 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1255 ; X64-NEXT: kmovw %eax, %k1
1256 ; X64-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
1257 ; X64-NEXT: retq
1268 ; X32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1269 ; X32-NEXT: retl
1273 ; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1274 ; X64-NEXT: retq
1282 ; X32-NEXT: pushl %eax
1283 ; X32-NEXT: .Ltmp28:
1284 ; X32-NEXT: .cfi_def_cfa_offset 8
1285 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1286 ; X32-NEXT: andb $15, %al
1287 ; X32-NEXT: movb %al, (%esp)
1288 ; X32-NEXT: movzbl (%esp), %eax
1289 ; X32-NEXT: kmovw %eax, %k1
1290 ; X32-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0]
1291 ; X32-NEXT: popl %eax
1292 ; X32-NEXT: retl
1296 ; X64-NEXT: andb $15, %dil
1297 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1298 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1299 ; X64-NEXT: kmovw %eax, %k1
1300 ; X64-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0]
1301 ; X64-NEXT: retq
1312 ; X32-NEXT: pushl %eax
1313 ; X32-NEXT: .Ltmp29:
1314 ; X32-NEXT: .cfi_def_cfa_offset 8
1315 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1316 ; X32-NEXT: andb $15, %al
1317 ; X32-NEXT: movb %al, (%esp)
1318 ; X32-NEXT: movzbl (%esp), %eax
1319 ; X32-NEXT: kmovw %eax, %k1
1320 ; X32-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0]
1321 ; X32-NEXT: popl %eax
1322 ; X32-NEXT: retl
1326 ; X64-NEXT: andb $15, %dil
1327 ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1328 ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1329 ; X64-NEXT: kmovw %eax, %k1
1330 ; X64-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0]
1331 ; X64-NEXT: retq
1342 ; X32-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4]
1343 ; X32-NEXT: retl
1347 ; X64-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4]
1348 ; X64-NEXT: retq
1356 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1357 ; X32-NEXT: kmovw %eax, %k1
1358 ; X32-NEXT: vshufps {{.*#+}} ymm0 {%k1} = ymm1[0,1],ymm2[0,0],ymm1[4,5],ymm2[4,4]
1359 ; X32-NEXT: retl
1363 ; X64-NEXT: kmovw %edi, %k1
1364 ; X64-NEXT: vshufps {{.*#+}} ymm0 {%k1} = ymm1[0,1],ymm2[0,0],ymm1[4,5],ymm2[4,4]
1365 ; X64-NEXT: retq
1375 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1376 ; X32-NEXT: kmovw %eax, %k1
1377 ; X32-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4]
1378 ; X32-NEXT: retl
1382 ; X64-NEXT
1383 ; X64-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4]
1384 ; X64-NEXT: retq