Lines Matching full:next
8 ; CHECK-NEXT: kmovw %edi, %k0
9 ; CHECK-NEXT: knotw %k0, %k0
10 ; CHECK-NEXT: kmovw %k0, %eax
11 ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
12 ; CHECK-NEXT: retq
22 ; CHECK-NEXT: kmovw %edi, %k0
23 ; CHECK-NEXT: knotw %k0, %k0
24 ; CHECK-NEXT: kmovw %k0, %eax
25 ; CHECK-NEXT: retq
36 ; KNL-NEXT: kmovw %edi, %k0
37 ; KNL-NEXT: knotw %k0, %k0
38 ; KNL-NEXT: kmovw %k0, %eax
39 ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
40 ; KNL-NEXT: retq
44 ; SKX-NEXT: kmovb %edi, %k0
45 ; SKX-NEXT: knotb %k0, %k0
46 ; SKX-NEXT: kmovb %k0, %eax
47 ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
48 ; SKX-NEXT: retq
58 ; KNL-NEXT: kmovw %edi, %k0
59 ; KNL-NEXT: knotw %k0, %k0
60 ; KNL-NEXT: kmovw %k0, %eax
61 ; KNL-NEXT: retq
65 ; SKX-NEXT: kmovb %edi, %k0
66 ; SKX-NEXT: knotb %k0, %k0
67 ; SKX-NEXT: kmovb %k0, %eax
68 ; SKX-NEXT: retq
79 ; CHECK-NEXT: kmovw (%rdi), %k0
80 ; CHECK-NEXT: knotw %k0, %k0
81 ; CHECK-NEXT: kmovw %k0, (%rdi)
82 ; CHECK-NEXT: retq
94 ; KNL-NEXT: movzbl (%rdi), %eax
95 ; KNL-NEXT: kmovw %eax, %k0
96 ; KNL-NEXT: knotw %k0, %k0
97 ; KNL-NEXT: kmovw %k0, %eax
98 ; KNL-NEXT: movb %al, (%rdi)
99 ; KNL-NEXT: retq
103 ; SKX-NEXT: kmovb (%rdi), %k0
104 ; SKX-NEXT: knotb %k0, %k0
105 ; SKX-NEXT: kmovb %k0, (%rdi)
106 ; SKX-NEXT: retq
118 ; CHECK-NEXT: movl %edi, %eax
119 ; CHECK-NEXT: xorl %esi, %eax
120 ; CHECK-NEXT: andl %esi, %edi
121 ; CHECK-NEXT: orl %eax, %edi
122 ; CHECK-NEXT: movl %edi, %eax
123 ; CHECK-NEXT: retq
136 ; CHECK-NEXT: kmovw (%rdi), %k0
137 ; CHECK-NEXT: kmovw (%rsi), %k1
138 ; CHECK-NEXT: kandw %k1, %k0, %k2
139 ; CHECK-NEXT: kxorw %k1, %k0, %k0
140 ; CHECK-NEXT: korw %k0, %k2, %k0
141 ; CHECK-NEXT: kmovw %k0, %eax
142 ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
143 ; CHECK-NEXT: retq
156 ; KNL-NEXT: kmovw %edi, %k0
157 ; KNL-NEXT: kshiftrw $8, %k0, %k0
158 ; KNL-NEXT: kmovw %k0, %eax
159 ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
160 ; KNL-NEXT: retq
164 ; SKX-NEXT: kmovw %edi, %k0
165 ; SKX-NEXT: kshiftrw $8, %k0, %k0
166 ; SKX-NEXT: kmovb %k0, %eax
167 ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
168 ; SKX-NEXT: retq
178 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
179 ; CHECK-NEXT: kshiftlw $10, %k0, %k0
180 ; CHECK-NEXT: kshiftrw $15, %k0, %k0
181 ; CHECK-NEXT: kmovw %k0, %eax
182 ; CHECK-NEXT: retq
192 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
193 ; CHECK-NEXT: kshiftlw $10, %k0, %k0
194 ; CHECK-NEXT: kshiftrw $15, %k0, %k0
195 ; CHECK-NEXT: kmovw %k0, %eax
196 ; CHECK-NEXT: retq
206 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
207 ; CHECK-NEXT: kshiftlw $10, %k0, %k0
208 ; CHECK-NEXT: kshiftrw $15, %k0, %k0
209 ; CHECK-NEXT: kmovw %k0, %eax
210 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
211 ; CHECK-NEXT: retq
221 ; KNL-NEXT: kxnorw %k0, %k0, %k0
222 ; KNL-NEXT: kmovw %k0, %eax
223 ; KNL-NEXT: movb %al, (%rdi)
224 ; KNL-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
225 ; KNL-NEXT: movb $-2, %al
226 ; KNL-NEXT: retq
230 ; SKX-NEXT: kxnorw %k0, %k0, %k0
231 ; SKX-NEXT: kmovb %k0, (%rdi)
232 ; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
233 ; SKX-NEXT: movb $-2, %al
234 ; SKX-NEXT: retq
248 ; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
249 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
250 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
251 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
252 ; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
253 ; KNL-NEXT: vpmovqd %zmm1, %ymm1
254 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1
255 ; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
256 ; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
257 ; KNL-NEXT: retq
261 ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0
262 ; SKX-NEXT: knotw %k0, %k1
263 ; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
264 ; SKX-NEXT: vpmovm2d %k0, %xmm0
265 ; SKX-NEXT: retq
276 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
277 ; KNL-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
278 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
279 ; KNL-NEXT: retq
283 ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
284 ; SKX-NEXT: knotw %k0, %k1
285 ; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
286 ; SKX-NEXT: vpmovm2q %k0, %xmm0
287 ; SKX-NEXT: retq
309 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
310 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
311 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
312 ; KNL-NEXT: movb $85, %al
313 ; KNL-NEXT: kmovw %eax, %k1
314 ; KNL-NEXT: korw %k1, %k0, %k0
315 ; KNL-NEXT: kmovw %k0, %eax
316 ; KNL-NEXT: testb %al, %al
317 ; KNL-NEXT: retq
321 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
322 ; SKX-NEXT: vpmovw2m %xmm0, %k0
323 ; SKX-NEXT: movb $85, %al
324 ; SKX-NEXT: kmovb %eax, %k1
325 ; SKX-NEXT: korb %k1, %k0, %k0
326 ; SKX-NEXT: ktestb %k0, %k0
327 ; SKX-NEXT: retq
343 ; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
344 ; KNL-NEXT: cmpl %esi, %edi
345 ; KNL-NEXT: jg LBB17_1
346 ; KNL-NEXT: ## BB#2:
347 ; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1
348 ; KNL-NEXT: jmp LBB17_3
349 ; KNL-NEXT: LBB17_1:
350 ; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
351 ; KNL-NEXT: LBB17_3:
352 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
353 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
354 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
355 ; KNL-NEXT: retq
359 ; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2
360 ; SKX-NEXT: cmpl %esi, %edi
361 ; SKX-NEXT: jg LBB17_1
362 ; SKX-NEXT: ## BB#2:
363 ; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0
364 ; SKX-NEXT: vpmovm2b %k0, %xmm0
365 ; SKX-NEXT: retq
366 ; SKX-NEXT: LBB17_1:
367 ; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0
368 ; SKX-NEXT: vpmovm2b %k0, %xmm0
369 ; SKX-NEXT: retq
380 ; KNL-NEXT: cmpl %esi, %edi
381 ; KNL-NEXT: jg LBB18_1
382 ; KNL-NEXT: ## BB#2:
383 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
384 ; KNL-NEXT: jmp LBB18_3
385 ; KNL-NEXT: LBB18_1:
386 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
387 ; KNL-NEXT: LBB18_3:
388 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
389 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
390 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
391 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
392 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
393 ; KNL-NEXT: retq
397 ; SKX-NEXT: cmpl %esi, %edi
398 ; SKX-NEXT: jg LBB18_1
399 ; SKX-NEXT: ## BB#2:
400 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0
401 ; SKX-NEXT: jmp LBB18_3
402 ; SKX-NEXT: LBB18_1:
403 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
404 ; SKX-NEXT: LBB18_3:
405 ; SKX-NEXT: vpmovb2m %xmm0, %k0
406 ; SKX-NEXT: vpmovm2b %k0, %xmm0
407 ; SKX-NEXT: retq
420 ; KNL-NEXT: cmpl %esi, %edi
421 ; KNL-NEXT: jg LBB20_2
422 ; KNL-NEXT: ## BB#1:
423 ; KNL-NEXT: vmovaps %zmm1, %zmm0
424 ; KNL-NEXT: LBB20_2:
425 ; KNL-NEXT: retq
429 ; SKX-NEXT: cmpl %esi, %edi
430 ; SKX-NEXT: jg LBB20_1
431 ; SKX-NEXT: ## BB#2:
432 ; SKX-NEXT: vpslld $31, %xmm1, %xmm0
433 ; SKX-NEXT: jmp LBB20_3
434 ; SKX-NEXT: LBB20_1:
435 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
436 ; SKX-NEXT: LBB20_3:
437 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
438 ; SKX-NEXT: vpmovm2d %k0, %xmm0
439 ; SKX-NEXT: retq
448 ; CHECK-NEXT: movl %edi, %eax
449 ; CHECK-NEXT: retq
459 ; CHECK-NEXT: movl %esi, %eax
460 ; CHECK-NEXT: retq
475 ; KNL-NEXT: cmpl %esi, %edi
476 ; KNL-NEXT: movw $21845, %ax ## imm = 0x5555
477 ; KNL-NEXT: movw $1, %cx
478 ; KNL-NEXT: cmovgw %ax, %cx
479 ; KNL-NEXT: kmovw %ecx, %k1
480 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
481 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
482 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
483 ; KNL-NEXT: retq
487 ; SKX-NEXT: cmpl %esi, %edi
488 ; SKX-NEXT: movw $21845, %ax ## imm = 0x5555
489 ; SKX-NEXT: movw $1, %cx
490 ; SKX-NEXT: cmovgw %ax, %cx
491 ; SKX-NEXT: kmovw %ecx, %k0
492 ; SKX-NEXT: vpmovm2b %k0, %xmm0
493 ; SKX-NEXT: retq
505 ; KNL-NEXT: pushq %rbp
506 ; KNL-NEXT: Ltmp0:
507 ; KNL-NEXT: .cfi_def_cfa_offset 16
508 ; KNL-NEXT: Ltmp1:
509 ; KNL-NEXT: .cfi_offset %rbp, -16
510 ; KNL-NEXT: movq %rsp, %rbp
511 ; KNL-NEXT: Ltmp2:
512 ; KNL-NEXT: .cfi_def_cfa_register %rbp
513 ; KNL-NEXT: andq $-32, %rsp
514 ; KNL-NEXT: subq $64, %rsp
515 ; KNL-NEXT: movl %edi, (%rsp)
516 ; KNL-NEXT: shrq $32, %rdi
517 ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
518 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
519 ; KNL-NEXT: kmovw (%rsp), %k1
520 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
521 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
522 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
523 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
524 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
525 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm2
526 ; KNL-NEXT: movl $1, %eax
527 ; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
528 ; KNL-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7]
529 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
530 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z}
531 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
532 NEXT: kmovw {{[0-9]+}}(%rsp), %k1
533 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
534 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
535 ; KNL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
536 ; KNL-NEXT: vpsllw $7, %ymm2, %ymm0
537 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
538 ; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
539 ; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
540 ; KNL-NEXT: movq %rbp, %rsp
541 ; KNL-NEXT: popq %rbp
542 ; KNL-NEXT: retq
546 ; SKX-NEXT: kmovq %rdi, %k0
547 ; SKX-NEXT: kxnorw %k0, %k0, %k1
548 ; SKX-NEXT: kshiftrw $15, %k1, %k1
549 ; SKX-NEXT: kshiftlq $5, %k1, %k1
550 ; SKX-NEXT: korq %k1, %k0, %k0
551 ; SKX-NEXT: vpmovm2b %k0, %zmm0
552 ; SKX-NEXT: retq
563 ; KNL-NEXT: pushq %rbp
564 ; KNL-NEXT: Ltmp3:
565 ; KNL-NEXT: .cfi_def_cfa_offset 16
566 ; KNL-NEXT: Ltmp4:
567 ; KNL-NEXT: .cfi_offset %rbp, -16
568 ; KNL-NEXT: movq %rsp, %rbp
569 ; KNL-NEXT: Ltmp5:
570 ; KNL-NEXT: .cfi_def_cfa_register %rbp
571 ; KNL-NEXT: andq $-32, %rsp
572 ; KNL-NEXT: subq $64, %rsp
573 ; KNL-NEXT: movl %edi, (%rsp)
574 ; KNL-NEXT: shrq $32, %rdi
575 ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
576 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
577 ; KNL-NEXT: kmovw (%rsp), %k1
578 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
579 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
580 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
581 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
582 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
583 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
584 ; KNL-NEXT: xorl %eax, %eax
585 ; KNL-NEXT: cmpl %edx, %esi
586 ; KNL-NEXT: setg %al
587 ; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
588 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
589 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
590 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
591 ; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
592 ; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
593 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
594 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
595 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
596 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
597 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
598 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
599 ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
600 ; KNL-NEXT: movq %rbp, %rsp
601 ; KNL-NEXT: popq %rbp
602 ; KNL-NEXT: retq
606 ; SKX-NEXT: kmovq %rdi, %k0
607 ; SKX-NEXT: cmpl %edx, %esi
608 ; SKX-NEXT: setg %al
609 ; SKX-NEXT: kmovw %eax, %k1
610 ; SKX-NEXT: kshiftlq $5, %k1, %k1
611 ; SKX-NEXT: korq %k1, %k0, %k0
612 ; SKX-NEXT: vpmovm2b %k0, %zmm0
613 ; SKX-NEXT: retq
624 ; KNL-NEXT: kmovw %edi, %k0
625 ; KNL-NEXT: kmovw %esi, %k1
626 ; KNL-NEXT: kshiftlw $7, %k1, %k2
627 ; KNL-NEXT: kshiftrw $15, %k2, %k2
628 ; KNL-NEXT: kshiftlw $6, %k1, %k1
629 ; KNL-NEXT: kshiftrw $15, %k1, %k1
630 ; KNL-NEXT: kshiftlw $6, %k1, %k1
631 ; KNL-NEXT: korw %k1, %k0, %k0
632 ; KNL-NEXT: kshiftlw $7, %k2, %k1
633 ; KNL-NEXT: korw %k1, %k0, %k1
634 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
635 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
636 ; KNL-NEXT: vpmovqw %zmm0, %xmm0
637 ; KNL-NEXT: retq
641 ; SKX-NEXT: kmovb %edi, %k0
642 ; SKX-NEXT: kmovw %esi, %k1
643 ; SKX-NEXT: kshiftlw $6, %k1, %k2
644 ; SKX-NEXT: kshiftrw $15, %k2, %k2
645 ; SKX-NEXT: kshiftlw $7, %k1, %k1
646 ; SKX-NEXT: kshiftrw $15, %k1, %k1
647 ; SKX-NEXT: kshiftlb $7, %k1, %k1
648 ; SKX-NEXT: kshiftlb $6, %k2, %k2
649 ; SKX-NEXT: korb %k2, %k0, %k0
650 ; SKX-NEXT: korb %k1, %k0, %k0
651 ; SKX-NEXT: vpmovm2w %k0, %xmm0
652 ; SKX-NEXT: retq
664 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
665 ; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
666 ; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
667 ; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0
668 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
669 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
670 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
671 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
672 ; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1
673 ; KNL-NEXT: retq
677 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
678 ; SKX-NEXT: vpmovb2m %ymm1, %k1
679 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
680 ; SKX-NEXT: retq
688 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
689 ; KNL-NEXT: vpslld $31, %ymm0, %ymm0
690 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
691 ; KNL-NEXT: kmovw %k0, %eax
692 ; KNL-NEXT: movb %al, (%rdi)
693 ; KNL-NEXT: retq
697 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
698 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
699 ; SKX-NEXT: kmovb %k0, (%rdi)
700 ; SKX-NEXT: retq
708 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
709 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
710 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
711 ; KNL-NEXT: kmovw %k0, %eax
712 ; KNL-NEXT: movb %al, (%rdi)
713 ; KNL-NEXT: retq
717 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
718 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0
719 ; SKX-NEXT: kmovb %k0, (%rdi)
720 ; SKX-NEXT: retq
728 ; KNL-NEXT: andl $1, %edi
729 ; KNL-NEXT: kmovw %edi, %k0
730 ; KNL-NEXT: kxnorw %k0, %k0, %k1
731 ; KNL-NEXT: kshiftrw $15, %k1, %k1
732 ; KNL-NEXT: kxorw %k1, %k0, %k0
733 ; KNL-NEXT: kmovw %k0, %eax
734 ; KNL-NEXT: movb %al, (%rsi)
735 ; KNL-NEXT: retq
739 ; SKX-NEXT: andl $1, %edi
740 ; SKX-NEXT: kmovw %edi, %k0
741 ; SKX-NEXT: kxnorw %k0, %k0, %k1
742 ; SKX-NEXT: kshiftrw $15, %k1, %k1
743 ; SKX-NEXT: kxorw %k1, %k0, %k0
744 ; SKX-NEXT: kmovb %k0, (%rsi)
745 ; SKX-NEXT: retq
754 ; KNL-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
755 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
756 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
757 ; KNL-NEXT: kmovw %k0, %eax
758 ; KNL-NEXT: movb %al, (%rdi)
759 ; KNL-NEXT: retq
763 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
764 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0
765 ; SKX-NEXT: knotw %k0, %k0
766 ; SKX-NEXT: kmovb %k0, (%rdi)
767 ; SKX-NEXT: retq
776 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
777 ; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
778 ; KNL-NEXT: vpslld $31, %ymm0, %ymm0
779 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
780 ; KNL-NEXT: kmovw %k0, %eax
781 ; KNL-NEXT: movb %al, (%rdi)
782 ; KNL-NEXT: retq
786 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
787 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
788 ; SKX-NEXT: knotw %k0, %k0
789 ; SKX-NEXT: kmovb %k0, (%rdi)
790 ; SKX-NEXT: retq
799 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
800 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
801 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
802 ; KNL-NEXT: knotw %k0, %k0
803 ; KNL-NEXT: kmovw %k0, %eax
804 ; KNL-NEXT: movb %al, (%rdi)
805 ; KNL-NEXT: retq
809 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
810 ; SKX-NEXT: vpmovw2m %xmm0, %k0
811 ; SKX-NEXT: knotb %k0, %k0
812 ; SKX-NEXT: kmovb %k0, (%rdi)
813 ; SKX-NEXT: retq
822 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
823 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
824 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
825 ; KNL-NEXT: knotw %k0, %k0
826 ; KNL-NEXT: kmovw %k0, (%rdi)
827 ; KNL-NEXT: retq
831 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
832 ; SKX-NEXT: vpmovb2m %xmm0, %k0
833 ; SKX-NEXT: knotw %k0, %k0
834 ; SKX-NEXT: kmovw %k0, (%rdi)
835 ; SKX-NEXT: retq
857 ; KNL-NEXT: movzbl {{.*}}(%rip), %edi
858 ; KNL-NEXT: movl %edi, %eax
859 ; KNL-NEXT: andl $1, %eax
860 ; KNL-NEXT: kmovw %eax, %k0
861 ; KNL-NEXT: kxnorw %k0, %k0, %k1
862 ; KNL-NEXT: kshiftrw $15, %k1, %k1
863 ; KNL-NEXT: kxorw %k1, %k0, %k0
864 ; KNL-NEXT: kmovw %k0, %eax
865 ; KNL-NEXT: movb %al, {{.*}}(%rip)
866 ; KNL-NEXT: xorl $1, %edi
867 ; KNL-NEXT: jmp _f2 ## TAILCALL
871 ; SKX-NEXT: movzbl {{.*}}(%rip), %edi
872 ; SKX-NEXT: movl %edi, %eax
873 ; SKX-NEXT: andl $1, %eax
874 ; SKX-NEXT: kmovw %eax, %k0
875 ; SKX-NEXT: kxnorw %k0, %k0, %k1
876 ; SKX-NEXT: kshiftrw $15, %k1, %k1
877 ; SKX-NEXT: kxorw %k1, %k0, %k0
878 ; SKX-NEXT: kmovb %k0, {{.*}}(%rip)
879 ; SKX-NEXT: xorl $1, %edi
880 ; SKX-NEXT: jmp _f2 ## TAILCALL
895 ; CHECK-NEXT: andl $1, %edi
896 ; CHECK-NEXT: movb %dil, (%rsi)
897 ; CHECK-NEXT: retq
906 ; CHECK-NEXT: andl $1, %edi
907 ; CHECK-NEXT: movb %dil, (%rsi)
908 ; CHECK-NEXT: retq
917 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
918 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
919 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
920 ; KNL-NEXT: vpand %ymm0, %ymm2, %ymm0
921 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
922 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
923 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
924 ; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1
925 ; KNL-NEXT: retq
929 ; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495
930 ; SKX-NEXT: kmovd %eax, %k1
931 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
932 ; SKX-NEXT: retq
940 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
941 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
942 ; KNL-NEXT: retq
946 ; SKX-NEXT: movabsq $6432645796886517060, %rax ## imm = 0x5945594549549544
947 ; SKX-NEXT: kmovq %rax, %k1
948 ; SKX-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
949 ; SKX-NEXT: retq
957 ; KNL-NEXT: vmovupd (%rdi), %zmm1
958 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1
959 ; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
960 ; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
961 ; KNL-NEXT: kmovw %k0, %eax
962 ; KNL-NEXT: testb %al, %al
963 ; KNL-NEXT: je LBB41_2
964 ; KNL-NEXT: ## BB#1: ## %L1
965 ; KNL-NEXT: vmovapd %zmm0, (%rdi)
966 ; KNL-NEXT: retq
967 ; KNL-NEXT: LBB41_2: ## %L2
968 ; KNL-NEXT: vmovapd %zmm0, 8(%rdi)
969 ; KNL-NEXT: retq
973 ; SKX-NEXT: vmovupd (%rdi), %zmm1
974 ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1
975 ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
976 ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
977 ; SKX-NEXT: ktestb %k0, %k0
978 ; SKX-NEXT: je LBB41_2
979 ; SKX-NEXT: ## BB#1: ## %L1
980 ; SKX-NEXT: vmovapd %zmm0, (%rdi)
981 ; SKX-NEXT: retq
982 ; SKX-NEXT: LBB41_2: ## %L2
983 ; SKX-NEXT: vmovapd %zmm0, 8(%rdi)
984 ; SKX-NEXT: retq
1016 ; KNL-NEXT: pushq %rbp
1017 ; KNL-NEXT: Ltmp6:
1018 ; KNL-NEXT: .cfi_def_cfa_offset 16
1019 ; KNL-NEXT: Ltmp7:
1020 ; KNL-NEXT: .cfi_offset %rbp, -16
1021 ; KNL-NEXT
1022 ; KNL-NEXT: Ltmp8:
1023 ; KNL-NEXT: .cfi_def_cfa_register %rbp
1024 ; KNL-NEXT: andq $-32, %rsp
1025 ; KNL-NEXT: subq $32, %rsp
1026 ; KNL-NEXT: vmovups (%rdi), %zmm2
1027 ; KNL-NEXT: vmovups 64(%rdi), %zmm3
1028 ; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1
1029 ; KNL-NEXT: kshiftlw $14, %k1, %k0
1030 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1031 ; KNL-NEXT: kmovw %k0, %eax
1032 ; KNL-NEXT: kshiftlw $15, %k1, %k0
1033 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1034 ; KNL-NEXT: kmovw %k0, %ecx
1035 ; KNL-NEXT: vmovd %ecx, %xmm3
1036 ; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
1037 ; KNL-NEXT: kshiftlw $13, %k1, %k0
1038 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1039 ; KNL-NEXT: kmovw %k0, %eax
1040 ; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
1041 ; KNL-NEXT: kshiftlw $12, %k1, %k0
1042 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1043 ; KNL-NEXT: kmovw %k0, %eax
1044 ; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
1045 ; KNL-NEXT: kshiftlw $11, %k1, %k0
1046 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1047 ; KNL-NEXT: kmovw %k0, %eax
1048 ; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
1049 ; KNL-NEXT: kshiftlw $10, %k1, %k0
1050 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1051 ; KNL-NEXT: kmovw %k0, %eax
1052 ; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
1053 ; KNL-NEXT: kshiftlw $9, %k1, %k0
1054 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1055 ; KNL-NEXT: kmovw %k0, %eax
1056 ; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
1057 ; KNL-NEXT: kshiftlw $8, %k1, %k0
1058 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1059 ; KNL-NEXT: kmovw %k0, %eax
1060 ; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
1061 ; KNL-NEXT: kshiftlw $7, %k1, %k0
1062 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1063 ; KNL-NEXT: kmovw %k0, %eax
1064 ; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
1065 ; KNL-NEXT: kshiftlw $6, %k1, %k0
1066 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1067 ; KNL-NEXT: kmovw %k0, %eax
1068 ; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
1069 ; KNL-NEXT: kshiftlw $5, %k1, %k0
1070 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1071 ; KNL-NEXT: kmovw %k0, %eax
1072 ; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
1073 ; KNL-NEXT: kshiftlw $4, %k1, %k0
1074 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1075 ; KNL-NEXT: kmovw %k0, %eax
1076 ; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
1077 ; KNL-NEXT: kshiftlw $3, %k1, %k0
1078 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1079 ; KNL-NEXT: kmovw %k0, %eax
1080 ; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
1081 ; KNL-NEXT: kshiftlw $2, %k1, %k0
1082 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1083 ; KNL-NEXT: kmovw %k0, %eax
1084 ; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
1085 ; KNL-NEXT: kshiftlw $1, %k1, %k0
1086 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1087 ; KNL-NEXT: kmovw %k0, %eax
1088 ; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
1089 ; KNL-NEXT: kshiftlw $0, %k1, %k0
1090 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1091 ; KNL-NEXT: kmovw %k0, %eax
1092 ; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
1093 ; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2
1094 ; KNL-NEXT: kshiftlw $14, %k2, %k0
1095 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1096 ; KNL-NEXT: kmovw %k0, %eax
1097 ; KNL-NEXT: kshiftlw $15, %k2, %k0
1098 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1099 ; KNL-NEXT: kmovw %k0, %ecx
1100 ; KNL-NEXT: vmovd %ecx, %xmm2
1101 ; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
1102 ; KNL-NEXT: kshiftlw $13, %k2, %k0
1103 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1104 ; KNL-NEXT: kmovw %k0, %eax
1105 ; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
1106 ; KNL-NEXT: kshiftlw $12, %k2, %k0
1107 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1108 ; KNL-NEXT: kmovw %k0, %eax
1109 ; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
1110 ; KNL-NEXT: kshiftlw $11, %k2, %k0
1111 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1112 ; KNL-NEXT: kmovw %k0, %eax
1113 ; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
1114 ; KNL-NEXT: kshiftlw $10, %k2, %k0
1115 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1116 ; KNL-NEXT: kmovw %k0, %eax
1117 ; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
1118 ; KNL-NEXT: kshiftlw $9, %k2, %k0
1119 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1120 ; KNL-NEXT: kmovw %k0, %eax
1121 ; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
1122 ; KNL-NEXT: kshiftlw $8, %k2, %k0
1123 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1124 ; KNL-NEXT: kmovw %k0, %eax
1125 ; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
1126 ; KNL-NEXT: kshiftlw $7, %k2, %k0
1127 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1128 ; KNL-NEXT: kmovw %k0, %eax
1129 ; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
1130 ; KNL-NEXT: kshiftlw $6, %k2, %k0
1131 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1132 ; KNL-NEXT: kmovw %k0, %eax
1133 ; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
1134 ; KNL-NEXT: kshiftlw $5, %k2, %k0
1135 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1136 ; KNL-NEXT: kmovw %k0, %eax
1137 ; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
1138 ; KNL-NEXT: kshiftlw $4, %k2, %k0
1139 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1140 ; KNL-NEXT: kmovw %k0, %eax
1141 ; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
1142 ; KNL-NEXT: kshiftlw $3, %k2, %k0
1143 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1144 ; KNL-NEXT: kmovw %k0, %eax
1145 ; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
1146 ; KNL-NEXT: kshiftlw $2, %k2, %k0
1147 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1148 ; KNL-NEXT: kmovw %k0, %eax
1149 ; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
1150 ; KNL-NEXT: kshiftlw $1, %k2, %k0
1151 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1152 ; KNL-NEXT: kmovw %k0, %eax
1153 ; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
1154 ; KNL-NEXT: kshiftlw $0, %k2, %k0
1155 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1156 ; KNL-NEXT: kmovw %k0, %eax
1157 ; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
1158 ; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
1159 ; KNL-NEXT: vpsllw $7, %ymm2, %ymm2
1160 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
1161 ; KNL-NEXT: vpxor %ymm3, %ymm3, %ymm3
1162 ; KNL-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2
1163 ; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z}
1164 ; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z}
1165 ; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0
1166 ; KNL-NEXT: kshiftlw $14, %k0, %k1
1167 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1168 ; KNL-NEXT: kmovw %k1, %eax
1169 ; KNL-NEXT: kshiftlw $15, %k0, %k1
1170 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1171 ; KNL-NEXT: kmovw %k1, %ecx
1172 ; KNL-NEXT: vmovd %ecx, %xmm4
1173 ; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
1174 ; KNL-NEXT: kshiftlw $13, %k0, %k1
1175 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1176 ; KNL-NEXT: kmovw %k1, %eax
1177 ; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
1178 ; KNL-NEXT: kshiftlw $12, %k0, %k1
1179 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1180 ; KNL-NEXT: kmovw %k1, %eax
1181 ; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
1182 ; KNL-NEXT: kshiftlw $11, %k0, %k1
1183 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1184 ; KNL-NEXT: kmovw %k1, %eax
1185 ; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
1186 ; KNL-NEXT: kshiftlw $10, %k0, %k1
1187 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1188 ; KNL-NEXT: kmovw %k1, %eax
1189 ; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
1190 ; KNL-NEXT: kshiftlw $9, %k0, %k1
1191 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1192 ; KNL-NEXT: kmovw %k1, %eax
1193 ; KNL-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
1194 ; KNL-NEXT: kshiftlw $8, %k0, %k1
1195 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1196 ; KNL-NEXT: kmovw %k1, %eax
1197 ; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
1198 ; KNL-NEXT: kshiftlw $7, %k0, %k1
1199 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1200 ; KNL-NEXT: kmovw %k1, %eax
1201 ; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
1202 ; KNL-NEXT: kshiftlw $6, %k0, %k1
1203 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1204 ; KNL-NEXT: kmovw %k1, %eax
1205 ; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
1206 ; KNL-NEXT: kshiftlw $5, %k0, %k1
1207 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1208 ; KNL-NEXT: kmovw %k1, %eax
1209 ; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
1210 ; KNL-NEXT: kshiftlw $4, %k0, %k1
1211 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1212 ; KNL-NEXT: kmovw %k1, %eax
1213 ; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
1214 ; KNL-NEXT: kshiftlw $3, %k0, %k1
1215 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1216 ; KNL-NEXT: kmovw %k1, %eax
1217 ; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
1218 ; KNL-NEXT: kshiftlw $2, %k0, %k1
1219 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1220 ; KNL-NEXT: kmovw %k1, %eax
1221 ; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
1222 ; KNL-NEXT: kshiftlw $1, %k0, %k1
1223 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1224 ; KNL-NEXT: kmovw %k1, %eax
1225 ; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
1226 ; KNL-NEXT: kshiftlw $0, %k0, %k0
1227 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1228 ; KNL-NEXT: kmovw %k0, %eax
1229 ; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
1230 ; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0
1231 ; KNL-NEXT: kshiftlw $14, %k0, %k1
1232 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1233 ; KNL-NEXT: kmovw %k1, %eax
1234 ; KNL-NEXT: kshiftlw $15, %k0, %k1
1235 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1236 ; KNL-NEXT: kmovw %k1, %ecx
1237 ; KNL-NEXT: vmovd %ecx, %xmm3
1238 ; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
1239 ; KNL-NEXT: kshiftlw $13, %k0, %k1
1240 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1241 ; KNL-NEXT: kmovw %k1, %eax
1242 ; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
1243 ; KNL-NEXT: kshiftlw $12, %k0, %k1
1244 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1245 ; KNL-NEXT: kmovw %k1, %eax
1246 ; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
1247 ; KNL-NEXT: kshiftlw $11, %k0, %k1
1248 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1249 ; KNL-NEXT: kmovw %k1, %eax
1250 ; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
1251 ; KNL-NEXT: kshiftlw $10, %k0, %k1
1252 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1253 ; KNL-NEXT: kmovw %k1, %eax
1254 ; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
1255 ; KNL-NEXT: kshiftlw $9, %k0, %k1
1256 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1257 ; KNL-NEXT: kmovw %k1, %eax
1258 ; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
1259 ; KNL-NEXT: kshiftlw $8, %k0, %k1
1260 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1261 ; KNL-NEXT: kmovw %k1, %eax
1262 ; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
1263 ; KNL-NEXT: kshiftlw $7, %k0, %k1
1264 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1265 ; KNL-NEXT: kmovw %k1, %eax
1266 ; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
1267 ; KNL-NEXT: kshiftlw $6, %k0, %k1
1268 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1269 ; KNL-NEXT: kmovw %k1, %eax
1270 ; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
1271 ; KNL-NEXT: kshiftlw $5, %k0, %k1
1272 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1273 ; KNL-NEXT: kmovw %k1, %eax
1274 ; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
1275 ; KNL-NEXT: kshiftlw $4, %k0, %k1
1276 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1277 ; KNL-NEXT: kmovw %k1, %eax
1278 ; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
1279 ; KNL-NEXT: kshiftlw $3, %k0, %k1
1280 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1281 ; KNL-NEXT: kmovw %k1, %eax
1282 ; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
1283 ; KNL-NEXT: kshiftlw $2, %k0, %k1
1284 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1285 ; KNL-NEXT: kmovw %k1, %eax
1286 ; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
1287 ; KNL-NEXT: kshiftlw $1, %k0, %k1
1288 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1289 ; KNL-NEXT: kmovw %k1, %eax
1290 ; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
1291 ; KNL-NEXT: kshiftlw $0, %k0, %k0
1292 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1293 ; KNL-NEXT: kmovw %k0, %eax
1294 ; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
1295 ; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
1296 ; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2
1297 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
1298 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
1299 ; KNL-NEXT: vpslld $31, %zmm3, %zmm3
1300 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
1301 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1302 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
1303 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
1304 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
1305 ; KNL-NEXT: kmovw %k0, (%rsp)
1306 ; KNL-NEXT: cmpl $0, (%rsp)
1307 ; KNL-NEXT: je LBB42_2
1308 ; KNL-NEXT: ## BB#1: ## %L1
1309 ; KNL-NEXT: vmovaps %zmm0, (%rdi)
1310 ; KNL-NEXT: vmovaps %zmm1, 64(%rdi)
1311 ; KNL-NEXT: jmp LBB42_3
1312 ; KNL-NEXT: LBB42_2: ## %L2
1313 ; KNL-NEXT: vmovaps %zmm0, 4(%rdi)
1314 ; KNL-NEXT: vmovaps %zmm1, 68(%rdi)
1315 ; KNL-NEXT: LBB42_3: ## %End
1316 ; KNL-NEXT: movq %rbp, %rsp
1317 ; KNL-NEXT: popq %rbp
1318 ; KNL-NEXT: retq
1322 ; SKX-NEXT: vmovups 64(%rdi), %zmm2
1323 ; SKX-NEXT: vmovups (%rdi), %zmm3
1324 ; SKX-NEXT: vcmpltps %zmm0, %zmm3, %k1
1325 ; SKX-NEXT: vcmpltps %zmm1, %zmm2, %k2
1326 ; SKX-NEXT: kunpckwd %k1, %k2, %k0
1327 ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z}
1328 ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z}
1329 ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1
1330 ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2
1331 ; SKX-NEXT: kunpckwd %k1, %k2, %k1
1332 ; SKX-NEXT: kord %k1, %k0, %k0
1333 ; SKX-NEXT: ktestd %k0, %k0
1334 ; SKX-NEXT: je LBB42_2
1335 ; SKX-NEXT: ## BB#1: ## %L1
1336 ; SKX-NEXT: vmovaps %zmm0, (%rdi)
1337 ; SKX-NEXT: vmovaps %zmm1, 64(%rdi)
1338 ; SKX-NEXT: retq
1339 ; SKX-NEXT: LBB42_2: ## %L2
1340 ; SKX-NEXT: vmovaps %zmm0, 4(%rdi)
1341 ; SKX-NEXT: vmovaps %zmm1, 68(%rdi)
1342 ; SKX-NEXT: retq
1373 ; KNL-NEXT: movzbl (%rdi), %eax
1374 ; KNL-NEXT: kmovw %eax, %k1
1375 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
1376 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1377 ; KNL-NEXT: retq
1381 ; SKX-NEXT: kmovb (%rdi), %k0
1382 ; SKX-NEXT: vpmovm2q %k0, %zmm0
1383 ; SKX-NEXT: retq
1392 ; KNL-NEXT: kmovw (%rdi), %k1
1393 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
1394 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
1395 ; KNL-NEXT: retq
1399 ; SKX-NEXT: kmovw (%rdi), %k0
1400 ; SKX-NEXT: vpmovm2d %k0, %zmm0
1401 ; SKX-NEXT: retq
1410 ; KNL-NEXT: movzbl (%rdi), %eax
1411 ; KNL-NEXT: kmovw %eax, %k1
1412 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
1413 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1414 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
1415 ; KNL-NEXT: retq
1419 ; SKX-NEXT: kmovb (%rdi), %k0
1420 ; SKX-NEXT: vpmovm2q %k0, %xmm0
1421 ; SKX-NEXT: retq
1430 ; KNL-NEXT: movzbl (%rdi), %eax
1431 ; KNL-NEXT: kmovw %eax, %k1
1432 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
1433 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1434 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
1435 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1436 ; KNL-NEXT: retq
1440 ; SKX-NEXT: kmovb (%rdi), %k0
1441 ; SKX-NEXT: vpmovm2d %k0, %xmm0
1442 ; SKX-NEXT: retq
1451 ; KNL-NEXT: kmovw (%rdi), %k1
1452 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
1453 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
1454 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
1455 ; KNL-NEXT: kmovw 2(%rdi), %k1
1456 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
1457 ; KNL-NEXT: vpmovdw %zmm1, %ymm1
1458 ; KNL-NEXT: retq
1462 ; SKX-NEXT: kmovd (%rdi), %k0
1463 ; SKX-NEXT: vpmovm2w %k0, %zmm0
1464 ; SKX-NEXT: retq
1473 ; KNL-NEXT: kmovw (%rdi), %k1
1474 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
1475 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
1476 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1477 ; KNL-NEXT: kmovw 2(%rdi), %k1
1478 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
1479 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1480 ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1481 ; KNL-NEXT: kmovw 4(%rdi), %k1
1482 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z}
1483 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1484 ; KNL-NEXT: kmovw 6(%rdi), %k1
1485 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
1486 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1487 ; KNL-NEXT
1488 ; KNL-NEXT: retq
1492 ; SKX-NEXT: kmovq (%rdi), %k0
1493 ; SKX-NEXT: vpmovm2b %k0, %zmm0
1494 ; SKX-NEXT: retq
1503 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1504 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1505 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1506 ; KNL-NEXT: kmovw %k0, %eax
1507 ; KNL-NEXT: movb %al, (%rdi)
1508 ; KNL-NEXT: retq
1512 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1513 ; SKX-NEXT: vpmovw2m %xmm0, %k0
1514 ; SKX-NEXT: kmovb %k0, (%rdi)
1515 ; SKX-NEXT: retq
1523 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1524 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1525 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1526 ; KNL-NEXT: kmovw %k0, %eax
1527 ; KNL-NEXT: movb %al, (%rdi)
1528 ; KNL-NEXT: retq
1532 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1533 ; SKX-NEXT: vpmovw2m %xmm0, %k0
1534 ; SKX-NEXT: kmovb %k0, (%rdi)
1535 ; SKX-NEXT: retq
1544 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1545 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1546 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1547 ; KNL-NEXT: kmovw %k0, (%rdi)
1548 ; KNL-NEXT: retq
1552 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1553 ; SKX-NEXT: vpmovb2m %xmm0, %k0
1554 ; SKX-NEXT: kmovw %k0, (%rdi)
1555 ; SKX-NEXT: retq
1563 ; KNL-NEXT: vextractf128 $1, %ymm0, %xmm1
1564 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
1565 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
1566 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
1567 ; KNL-NEXT: kmovw %k0, 2(%rdi)
1568 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1569 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1570 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1571 ; KNL-NEXT: kmovw %k0, (%rdi)
1572 ; KNL-NEXT: retq
1576 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
1577 ; SKX-NEXT: vpmovb2m %ymm0, %k0
1578 ; SKX-NEXT: kmovd %k0, (%rdi)
1579 ; SKX-NEXT: retq
1587 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1588 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1589 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
1590 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1591 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
1592 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
1593 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
1594 ; KNL-NEXT: kmovw %k0, 2(%rdi)
1595 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1596 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1597 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1598 ; KNL-NEXT: kmovw %k0, (%rdi)
1599 ; KNL-NEXT: retq
1603 ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0
1604 ; SKX-NEXT: vpmovw2m %zmm0, %k0
1605 ; SKX-NEXT: kmovd %k0, (%rdi)
1606 ; SKX-NEXT: retq
1617 ; KNL-NEXT: pushq %rbp
1618 ; KNL-NEXT: Ltmp9:
1619 ; KNL-NEXT: .cfi_def_cfa_offset 16
1620 ; KNL-NEXT: pushq %r15
1621 ; KNL-NEXT: Ltmp10:
1622 ; KNL-NEXT: .cfi_def_cfa_offset 24
1623 ; KNL-NEXT: pushq %r14
1624 ; KNL-NEXT: Ltmp11:
1625 ; KNL-NEXT: .cfi_def_cfa_offset 32
1626 ; KNL-NEXT: pushq %r13
1627 ; KNL-NEXT: Ltmp12:
1628 ; KNL-NEXT: .cfi_def_cfa_offset 40
1629 ; KNL-NEXT: pushq %r12
1630 ; KNL-NEXT: Ltmp13:
1631 ; KNL-NEXT: .cfi_def_cfa_offset 48
1632 ; KNL-NEXT: pushq %rbx
1633 ; KNL-NEXT: Ltmp14:
1634 ; KNL-NEXT: .cfi_def_cfa_offset 56
1635 ; KNL-NEXT: Ltmp15:
1636 ; KNL-NEXT: .cfi_offset %rbx, -56
1637 ; KNL-NEXT: Ltmp16:
1638 ; KNL-NEXT: .cfi_offset %r12, -48
1639 ; KNL-NEXT: Ltmp17:
1640 ; KNL-NEXT: .cfi_offset %r13, -40
1641 ; KNL-NEXT: Ltmp18:
1642 ; KNL-NEXT: .cfi_offset %r14, -32
1643 ; KNL-NEXT: Ltmp19:
1644 ; KNL-NEXT: .cfi_offset %r15, -24
1645 ; KNL-NEXT: Ltmp20:
1646 ; KNL-NEXT: .cfi_offset %rbp, -16
1647 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1648 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1649 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
1650 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
1651 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
1652 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
1653 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
1654 ; KNL-NEXT: vpslld $31, %zmm3, %zmm3
1655 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
1656 ; KNL-NEXT: kshiftlw $14, %k0, %k1
1657 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1658 ; KNL-NEXT: kmovw %k1, %r8d
1659 ; KNL-NEXT: kshiftlw $15, %k0, %k1
1660 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1661 ; KNL-NEXT: kmovw %k1, %r9d
1662 ; KNL-NEXT: kshiftlw $13, %k0, %k1
1663 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1664 ; KNL-NEXT: kmovw %k1, %r10d
1665 ; KNL-NEXT: kshiftlw $12, %k0, %k1
1666 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1667 ; KNL-NEXT: kmovw %k1, %r11d
1668 ; KNL-NEXT: kshiftlw $11, %k0, %k1
1669 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1670 ; KNL-NEXT: kmovw %k1, %r14d
1671 ; KNL-NEXT: kshiftlw $10, %k0, %k1
1672 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1673 ; KNL-NEXT: kmovw %k1, %r15d
1674 ; KNL-NEXT: kshiftlw $9, %k0, %k1
1675 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1676 ; KNL-NEXT: kmovw %k1, %r12d
1677 ; KNL-NEXT: kshiftlw $8, %k0, %k1
1678 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1679 ; KNL-NEXT: kmovw %k1, %r13d
1680 ; KNL-NEXT: kshiftlw $7, %k0, %k1
1681 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1682 ; KNL-NEXT: kmovw %k1, %ebx
1683 ; KNL-NEXT: kshiftlw $6, %k0, %k1
1684 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1685 ; KNL-NEXT: kmovw %k1, %ebp
1686 ; KNL-NEXT: kshiftlw $5, %k0, %k1
1687 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1688 ; KNL-NEXT: kmovw %k1, %eax
1689 ; KNL-NEXT: kshiftlw $4, %k0, %k1
1690 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1691 ; KNL-NEXT: kmovw %k1, %ecx
1692 ; KNL-NEXT: kshiftlw $3, %k0, %k1
1693 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1694 ; KNL-NEXT: kmovw %k1, %edx
1695 ; KNL-NEXT: kshiftlw $2, %k0, %k1
1696 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1697 ; KNL-NEXT: kmovw %k1, %esi
1698 ; KNL-NEXT: kshiftlw $1, %k0, %k1
1699 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1700 ; KNL-NEXT: vmovd %r9d, %xmm3
1701 ; KNL-NEXT: kmovw %k1, %r9d
1702 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2
1703 ; KNL-NEXT: kshiftlw $0, %k0, %k0
1704 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1705 ; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2
1706 ; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2
1707 ; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
1708 ; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2
1709 ; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2
1710 ; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2
1711 ; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2
1712 ; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2
1713 ; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2
1714 ; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
1715 ; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
1716 ; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
1717 ; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2
1718 ; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2
1719 ; KNL-NEXT: kmovw %k0, %eax
1720 ; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
1721 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
1722 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
1723 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
1724 ; KNL-NEXT: kmovw %k0, 6(%rdi)
1725 ; KNL-NEXT: kshiftlw $14, %k2, %k0
1726 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1727 ; KNL-NEXT: kmovw %k0, %r8d
1728 ; KNL-NEXT: kshiftlw $15, %k2, %k0
1729 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1730 ; KNL-NEXT: kmovw %k0, %r10d
1731 ; KNL-NEXT: kshiftlw $13, %k2, %k0
1732 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1733 ; KNL-NEXT: kmovw %k0, %r9d
1734 ; KNL-NEXT: kshiftlw $12, %k2, %k0
1735 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1736 ; KNL-NEXT: kmovw %k0, %r11d
1737 ; KNL-NEXT: kshiftlw $11, %k2, %k0
1738 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1739 ; KNL-NEXT: kmovw %k0, %r14d
1740 ; KNL-NEXT: kshiftlw $10, %k2, %k0
1741 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1742 ; KNL-NEXT: kmovw %k0, %r15d
1743 ; KNL-NEXT: kshiftlw $9, %k2, %k0
1744 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1745 ; KNL-NEXT: kmovw %k0, %r12d
1746 ; KNL-NEXT: kshiftlw $8, %k2, %k0
1747 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1748 ; KNL-NEXT: kmovw %k0, %r13d
1749 ; KNL-NEXT: kshiftlw $7, %k2, %k0
1750 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1751 ; KNL-NEXT: kmovw %k0, %edx
1752 ; KNL-NEXT: kshiftlw $6, %k2, %k0
1753 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1754 ; KNL-NEXT: kmovw %k0, %esi
1755 ; KNL-NEXT: kshiftlw $5, %k2, %k0
1756 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1757 ; KNL-NEXT: kmovw %k0, %ebp
1758 ; KNL-NEXT: kshiftlw $4, %k2, %k0
1759 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1760 ; KNL-NEXT: kmovw %k0, %ebx
1761 ; KNL-NEXT: kshiftlw $3, %k2, %k0
1762 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1763 ; KNL-NEXT: kmovw %k0, %eax
1764 ; KNL-NEXT: kshiftlw $2, %k2, %k0
1765 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1766 ; KNL-NEXT: kmovw %k0, %ecx
1767 ; KNL-NEXT: kshiftlw $1, %k2, %k0
1768 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1769 ; KNL-NEXT: vmovd %r10d, %xmm2
1770 ; KNL-NEXT: kmovw %k0, %r10d
1771 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
1772 ; KNL-NEXT: kshiftlw $0, %k2, %k0
1773 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1774 ; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1
1775 ; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
1776 ; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1
1777 ; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1
1778 ; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1
1779 ; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1
1780 ; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1
1781 ; KNL-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1
1782 ; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
1783 ; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1
1784 ; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1
1785 ; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
1786 ; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
1787 ; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1
1788 ; KNL-NEXT: kmovw %k0, %eax
1789 ; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
1790 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
1791 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
1792 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
1793 ; KNL-NEXT: kmovw %k0, 4(%rdi)
1794 ; KNL-NEXT: kshiftlw $14, %k1, %k0
1795 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1796 ; KNL-NEXT: kmovw %k0, %r8d
1797 ; KNL-NEXT: kshiftlw $15, %k1, %k0
1798 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1799 ; KNL-NEXT: kmovw %k0, %r10d
1800 ; KNL-NEXT: kshiftlw $13, %k1, %k0
1801 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1802 ; KNL-NEXT: kmovw %k0, %r9d
1803 ; KNL-NEXT: kshiftlw $12, %k1, %k0
1804 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1805 ; KNL-NEXT: kmovw %k0, %r11d
1806 ; KNL-NEXT: kshiftlw $11, %k1, %k0
1807 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1808 ; KNL-NEXT: kmovw %k0, %r14d
1809 ; KNL-NEXT: kshiftlw $10, %k1, %k0
1810 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1811 ; KNL-NEXT: kmovw %k0, %r15d
1812 ; KNL-NEXT: kshiftlw $9, %k1, %k0
1813 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1814 ; KNL-NEXT: kmovw %k0, %r12d
1815 ; KNL-NEXT: kshiftlw $8, %k1, %k0
1816 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1817 ; KNL-NEXT: kmovw %k0, %r13d
1818 ; KNL-NEXT: kshiftlw $7, %k1, %k0
1819 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1820 ; KNL-NEXT: kmovw %k0, %edx
1821 ; KNL-NEXT: kshiftlw $6, %k1, %k0
1822 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1823 ; KNL-NEXT: kmovw %k0, %esi
1824 ; KNL-NEXT: kshiftlw $5, %k1, %k0
1825 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1826 ; KNL-NEXT: kmovw %k0, %ebp
1827 ; KNL-NEXT: kshiftlw $4, %k1, %k0
1828 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1829 ; KNL-NEXT: kmovw %k0, %ebx
1830 ; KNL-NEXT: kshiftlw $3, %k1, %k0
1831 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1832 ; KNL-NEXT: kmovw %k0, %eax
1833 ; KNL-NEXT: kshiftlw $2, %k1, %k0
1834 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1835 ; KNL-NEXT: kmovw %k0, %ecx
1836 ; KNL-NEXT: kshiftlw $1, %k1, %k0
1837 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1838 ; KNL-NEXT: vmovd %r10d, %xmm1
1839 ; KNL-NEXT: kmovw %k0, %r10d
1840 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1841 ; KNL-NEXT: kshiftlw $0, %k1, %k1
1842 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1843 ; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0
1844 ; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
1845 ; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
1846 ; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
1847 ; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
1848 ; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
1849 ; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
1850 ; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
1851 ; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
1852 ; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
1853 ; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
1854 ; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1855 ; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
1856 ; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0
1857 ; KNL-NEXT: kmovw %k1, %eax
1858 ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
1859 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1860 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1861 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1862 ; KNL-NEXT: kmovw %k1, 2(%rdi)
1863 ; KNL-NEXT: kshiftlw $14, %k0, %k1
1864 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1865 ; KNL-NEXT: kmovw %k1, %r8d
1866 ; KNL-NEXT: kshiftlw $15, %k0, %k1
1867 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1868 ; KNL-NEXT: kmovw %k1, %r9d
1869 ; KNL-NEXT: kshiftlw $13, %k0, %k1
1870 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1871 ; KNL-NEXT: kmovw %k1, %r10d
1872 ; KNL-NEXT: kshiftlw $12, %k0, %k1
1873 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1874 ; KNL-NEXT: kmovw %k1, %r11d
1875 ; KNL-NEXT: kshiftlw $11, %k0, %k1
1876 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1877 ; KNL-NEXT: kmovw %k1, %r14d
1878 ; KNL-NEXT: kshiftlw $10, %k0, %k1
1879 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1880 ; KNL-NEXT: kmovw %k1, %r15d
1881 ; KNL-NEXT: kshiftlw $9, %k0, %k1
1882 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1883 ; KNL-NEXT: kmovw %k1, %r12d
1884 ; KNL-NEXT: kshiftlw $8, %k0, %k1
1885 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1886 ; KNL-NEXT: kmovw %k1, %r13d
1887 ; KNL-NEXT: kshiftlw $7, %k0, %k1
1888 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1889 ; KNL-NEXT: kmovw %k1, %edx
1890 ; KNL-NEXT: kshiftlw $6, %k0, %k1
1891 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1892 ; KNL-NEXT: kmovw %k1, %esi
1893 ; KNL-NEXT: kshiftlw $5, %k0, %k1
1894 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1895 ; KNL-NEXT: kmovw %k1, %ebp
1896 ; KNL-NEXT: kshiftlw $4, %k0, %k1
1897 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1898 ; KNL-NEXT: kmovw %k1, %ebx
1899 ; KNL-NEXT: kshiftlw $3, %k0, %k1
1900 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1901 ; KNL-NEXT: kmovw %k1, %eax
1902 ; KNL-NEXT: kshiftlw $2, %k0, %k1
1903 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1904 ; KNL-NEXT: kmovw %k1, %ecx
1905 ; KNL-NEXT: kshiftlw $1, %k0, %k1
1906 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1907 ; KNL-NEXT: vmovd %r9d, %xmm0
1908 ; KNL-NEXT: kmovw %k1, %r9d
1909 ; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
1910 ; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
1911 ; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
1912 ; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
1913 ; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
1914 ; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
1915 ; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
1916 ; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
1917 ; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
1918 ; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
1919 ; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
1920 ; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1921 ; KNL-NEXT: kshiftlw $0, %k0, %k0
1922 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1923 ; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
1924 ; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0
1925 ; KNL-NEXT: kmovw %k0, %eax
1926 ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
1927 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1928 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1929 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1930 ; KNL-NEXT: kmovw %k0, (%rdi)
1931 ; KNL-NEXT: popq %rbx
1932 ; KNL-NEXT: popq %r12
1933 ; KNL-NEXT: popq %r13
1934 ; KNL-NEXT: popq %r14
1935 ; KNL-NEXT: popq %r15
1936 ; KNL-NEXT: popq %rbp
1937 ; KNL-NEXT: retq
1941 ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0
1942 ; SKX-NEXT: vpmovb2m %zmm0, %k0
1943 ; SKX-NEXT: kmovq %k0, (%rdi)
1944 ; SKX-NEXT: retq