Lines Matching full:next
22 ; ALL-NEXT: retq
32 ; ALL-NEXT: retq
45 ; ALL-NEXT: retq
58 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
59 ; SSE-NEXT: retq
63 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
64 ; AVX-NEXT: retq
77 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
78 ; SSE-NEXT: retq
82 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
83 ; AVX-NEXT: retq
96 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
97 ; SSE-NEXT: retq
101 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
102 ; AVX1-NEXT: retq
106 ; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
107 ; AVX2-NEXT: retq
117 ; ALL-NEXT: retq
127 ; ALL-NEXT: retq
138 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
139 ; SSE-NEXT: retq
143 ; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
144 ; AVX-NEXT: retq
155 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
156 ; SSE-NEXT: retq
160 ; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
161 ; AVX-NEXT: retq
172 ; SSE-NEXT: pand %xmm1, %xmm0
173 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
174 ; SSE-NEXT: retq
178 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
179 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
180 ; AVX-NEXT: retq
190 ; SSE-NEXT: por %xmm1, %xmm0
191 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
192 ; SSE-NEXT: retq
196 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
197 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
198 ; AVX-NEXT: retq
208 ; SSE-NEXT: pxor %xmm1, %xmm0
209 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
210 ; SSE-NEXT: retq
214 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
215 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
216 ; AVX-NEXT: retq
226 ; SSE-NEXT: pand %xmm1, %xmm0
227 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
228 ; SSE-NEXT: retq
232 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
233 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
234 ; AVX-NEXT: retq
244 ; SSE-NEXT: por %xmm1, %xmm0
245 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
246 ; SSE-NEXT: retq
250 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
251 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
252 ; AVX-NEXT: retq
262 ; SSE-NEXT: pxor %xmm1, %xmm0
263 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
264 ; SSE-NEXT: retq
268 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
269 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
270 ; AVX-NEXT: retq
284 ; SSE2-NEXT: pand %xmm1, %xmm0
285 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
286 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
287 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
288 ; SSE2-NEXT: retq
292 ; SSSE3-NEXT: pand %xmm1, %xmm0
293 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
294 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
295 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
296 ; SSSE3-NEXT: retq
300 ; SSE41-NEXT: pand %xmm1, %xmm0
301 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
302 ; SSE41-NEXT: retq
306 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
307 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
308 ; AVX1-NEXT: retq
312 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
313 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
314 ; AVX2-NEXT: retq
324 ; SSE2-NEXT: por %xmm1, %xmm0
325 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
326 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
327 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
328 ; SSE2-NEXT: retq
332 ; SSSE3-NEXT: por %xmm1, %xmm0
333 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
334 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
335 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
336 ; SSSE3-NEXT: retq
340 ; SSE41-NEXT: por %xmm1, %xmm0
341 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
342 ; SSE41-NEXT: retq
346 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
347 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
348 ; AVX1-NEXT: retq
352 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
353 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
354 ; AVX2-NEXT: retq
364 ; SSE2-NEXT: xorps %xmm1, %xmm0
365 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
366 ; SSE2-NEXT: retq
370 ; SSSE3-NEXT: xorps %xmm1, %xmm0
371 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
372 ; SSSE3-NEXT: retq
376 ; SSE41-NEXT: pxor %xmm1, %xmm0
377 ; SSE41-NEXT: pxor %xmm1, %xmm1
378 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
379 ; SSE41-NEXT: retq
383 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
384 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
385 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
386 ; AVX1-NEXT: retq
390 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
391 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
392 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
393 ; AVX2-NEXT: retq
403 ; SSE2-NEXT: pand %xmm1, %xmm0
404 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
405 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
406 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
407 ; SSE2-NEXT: retq
411 ; SSSE3-NEXT: pand %xmm1, %xmm0
412 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
413 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
414 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
415 ; SSSE3-NEXT: retq
419 ; SSE41-NEXT: pand %xmm1, %xmm0
420 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
421 ; SSE41-NEXT: retq
425 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
426 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
427 ; AVX1-NEXT: retq
431 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
432 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
433 ; AVX2-NEXT: retq
443 ; SSE2-NEXT: por %xmm1, %xmm0
444 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
445 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
446 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
447 ; SSE2-NEXT: retq
451 ; SSSE3-NEXT: por %xmm1, %xmm0
452 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
453 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
454 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
455 ; SSSE3-NEXT: retq
459 ; SSE41-NEXT: por %xmm1, %xmm0
460 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
461 ; SSE41-NEXT: retq
465 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
466 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
467 ; AVX1-NEXT: retq
471 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
472 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
473 ; AVX2-NEXT: retq
483 ; SSE2-NEXT: xorps %xmm1, %xmm0
484 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
485 ; SSE2-NEXT: retq
489 ; SSSE3-NEXT: xorps %xmm1, %xmm0
490 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
491 ; SSSE3-NEXT: retq
495 ; SSE41-NEXT: pxor %xmm1, %xmm0
496 ; SSE41-NEXT: pxor %xmm1, %xmm1
497 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
498 ; SSE41-NEXT: retq
502 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
503 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
504 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
505 ; AVX1-NEXT: retq
509 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
510 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
511 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
512 ; AVX2-NEXT: retq
522 ; SSE2-NEXT: pand %xmm1, %xmm0
523 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
524 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
525 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
526 ; SSE2-NEXT: retq
530 ; SSSE3-NEXT: pand %xmm1, %xmm0
531 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
532 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
533 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
534 ; SSSE3-NEXT: retq
538 ; SSE41-NEXT: pand %xmm1, %xmm0
539 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
540 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
541 ; SSE41-NEXT: retq
545 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
546 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
547 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
548 ; AVX1-NEXT: retq
552 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
553 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
554 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
555 ; AVX2-NEXT: retq
565 ; SSE2-NEXT: por %xmm1, %xmm0
566 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
567 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
568 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
569 ; SSE2-NEXT: retq
573 ; SSSE3-NEXT: por %xmm1, %xmm0
574 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
575 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
576 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
577 ; SSSE3-NEXT: retq
581 ; SSE41-NEXT: por %xmm1, %xmm0
582 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
583 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
584 ; SSE41-NEXT: retq
588 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
589 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
590 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
591 ; AVX1-NEXT: retq
595 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
596 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
597 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
598 ; AVX2-NEXT: retq
608 ; SSE2-NEXT: pxor %xmm1, %xmm0
609 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
610 ; SSE2-NEXT: pxor %xmm1, %xmm1
611 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
612 ; SSE2-NEXT: retq
616 ; SSSE3-NEXT: pxor %xmm1, %xmm0
617 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
618 ; SSSE3-NEXT: pxor %xmm1, %xmm1
619 ; SSSE3-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
620 ; SSSE3-NEXT: retq
624 ; SSE41-NEXT: pxor %xmm1, %xmm0
625 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
626 ; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
627 ; SSE41-NEXT: retq
631 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
632 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
633 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
634 ; AVX-NEXT: retq
644 ; SSE2-NEXT: pand %xmm1, %xmm0
645 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
646 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
647 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
648 ; SSE2-NEXT: retq
652 ; SSSE3-NEXT: pand %xmm1, %xmm0
653 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
654 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
655 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
656 ; SSSE3-NEXT: retq
660 ; SSE41-NEXT: pand %xmm1, %xmm0
661 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
662 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
663 ; SSE41-NEXT: retq
667 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
668 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
669 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
670 ; AVX1-NEXT: retq
674 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
675 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
676 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
677 ; AVX2-NEXT: retq
687 ; SSE2-NEXT: por %xmm1, %xmm0
688 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
689 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
690 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
691 ; SSE2-NEXT: retq
695 ; SSSE3-NEXT: por %xmm1, %xmm0
696 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
697 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
698 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
699 ; SSSE3-NEXT: retq
703 ; SSE41-NEXT: por %xmm1, %xmm0
704 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
705 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
706 ; SSE41-NEXT: retq
710 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
711 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
712 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
713 ; AVX1-NEXT: retq
717 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
718 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
719 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
720 ; AVX2-NEXT: retq
730 ; SSE2-NEXT: pxor %xmm1, %xmm0
731 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
732 ; SSE2-NEXT: pxor %xmm0, %xmm0
733 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
734 ; SSE2-NEXT: retq
738 ; SSSE3-NEXT: pxor %xmm1, %xmm0
739 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
740 ; SSSE3-NEXT: pxor %xmm0, %xmm0
741 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
742 ; SSSE3-NEXT: retq
746 ; SSE41-NEXT: pxor %xmm1, %xmm0
747 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
748 ; SSE41-NEXT: pxor %xmm0, %xmm0
749 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
750 ; SSE41-NEXT: retq
754 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
755 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
756 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
757 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
758 ; AVX1-NEXT: retq
762 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
763 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
764 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
765 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
766 ; AVX2-NEXT: retq
776 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
777 ; SSE-NEXT: retq
781 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
782 ; AVX-NEXT: retq
791 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
792 ; SSE-NEXT: retq
796 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
797 ; AVX-NEXT: retq
806 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
807 ; SSE-NEXT: retq
811 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
812 ; AVX-NEXT: retq
821 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
822 ; SSE-NEXT: retq
826 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
827 ; AVX1-NEXT: retq
831 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
832 ; AVX2-NEXT: retq
841 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
842 ; SSE-NEXT: retq
846 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
847 ; AVX-NEXT: retq
856 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
857 ; SSE-NEXT: retq
861 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
862 ; AVX-NEXT: retq
871 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
872 ; SSE-NEXT: retq
876 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
877 ; AVX-NEXT: retq
886 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
887 ; SSE-NEXT: retq
891 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
892 ; AVX-NEXT: retq
901 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,2]
902 ; SSE-NEXT: retq
906 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,2]
907 ; AVX-NEXT: retq
916 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
917 ; SSE-NEXT: retq
921 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
922 ; AVX-NEXT: retq
931 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,1]
932 ; SSE-NEXT: retq
936 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,1]
937 ; AVX-NEXT: retq
946 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
947 ; SSE-NEXT: retq
951 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
952 ; AVX1-NEXT: retq
956 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
957 ; AVX2-NEXT: retq
967 ; ALL-NEXT: retq
977 ; SSE-NEXT: movaps %xmm1, %xmm0
978 ; SSE-NEXT: retq
982 ; AVX-NEXT: vmovaps %xmm1, %xmm0
983 ; AVX-NEXT: retq
999 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
1000 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,1]
1001 ; SSE2-NEXT: movaps %xmm1, %xmm0
1002 ; SSE2-NEXT: retq
1006 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
1007 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,1]
1008 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1009 ; SSSE3-NEXT: retq
1013 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1014 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
1015 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1016 ; SSE41-NEXT: retq
1020 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1021 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
1022 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1023 ; AVX1-NEXT: retq
1027 ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
1028 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
1029 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
1030 ; AVX2-NEXT: retq
1039 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1040 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
1041 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1042 ; SSE2-NEXT: retq
1046 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1047 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
1048 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1049 ; SSSE3-NEXT: retq
1053 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1054 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1055 ; SSE41-NEXT: retq
1059 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1060 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1061 ; AVX1-NEXT: retq
1065 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1066 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1067 ; AVX2-NEXT: retq
1076 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
1077 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1],xmm1[0,2]
1078 ; SSE2-NEXT: retq
1082 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
1083 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1],xmm1[0,2]
1084 ; SSSE3-NEXT: retq
1088 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
1089 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
1090 ; SSE41-NEXT: retq
1094 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
1095 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
1096 ; AVX1-NEXT: retq
1100 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1101 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
1102 ; AVX2-NEXT: retq
1111 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,3]
1112 ; SSE-NEXT: retq
1116 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,0,3]
1117 ; AVX-NEXT: retq
1126 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1127 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
1128 ; SSE2-NEXT: retq
1132 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1133 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
1134 ; SSSE3-NEXT: retq
1138 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1139 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
1140 ; SSE41-NEXT: retq
1144 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
1145 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
1146 ; AVX1-NEXT: retq
1150 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
1151 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
1152 ; AVX2-NEXT: retq
1161 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3]
1162 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
1163 ; SSE2-NEXT: movaps %xmm1, %xmm0
1164 ; SSE2-NEXT: retq
1168 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3]
1169 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
1170 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1171 ; SSSE3-NEXT: retq
1175 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1176 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,3,0]
1177 ; SSE41-NEXT: retq
1181 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1182 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0]
1183 ; AVX1-NEXT: retq
1187 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
1188 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0]
1189 ; AVX2-NEXT: retq
1198 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1199 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,0,3]
1200 ; SSE2-NEXT: retq
1204 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1205 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,0,3]
1206 ; SSSE3-NEXT: retq
1210 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1211 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1212 ; SSE41-NEXT: retq
1216 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1217 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1218 ; AVX1-NEXT: retq
1222 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1223 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1224 ; AVX2-NEXT: retq
1237 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,3]
1238 ; SSE-NEXT: retq
1242 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,1,3]
1243 ; AVX-NEXT: retq
1252 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
1253 ; SSE-NEXT: retq
1257 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
1258 ; AVX-NEXT: retq
1267 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,3]
1268 ; SSE-NEXT: retq
1272 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,3,2,3]
1273 ; AVX-NEXT: retq
1282 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1283 ; SSE-NEXT: retq
1287 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1288 ; AVX1-NEXT: retq
1292 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1293 ; AVX2-NEXT: retq
1302 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1303 ; SSE-NEXT: retq
1307 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1308 ; AVX-NEXT: retq
1317 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1318 ; SSE-NEXT: retq
1322 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1323 ; AVX1-NEXT: retq
1327 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1328 ; AVX2-NEXT: retq
1337 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
1338 ; SSE-NEXT: retq
1342 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
1343 ; AVX-NEXT: retq
1352 ; SSE-NEXT: movaps %xmm1, %xmm0
1353 ; SSE-NEXT: retq
1357 ; AVX-NEXT: vmovaps %xmm1, %xmm0
1358 ; AVX-NEXT: retq
1367 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1368 ; SSE2-NEXT: movaps %xmm1, %xmm0
1369 ; SSE2-NEXT: retq
1373 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1374 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1375 ; SSSE3-NEXT: retq
1379 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1380 ; SSE41-NEXT: retq
1384 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1385 ; AVX-NEXT: retq
1394 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1395 ; SSE-NEXT: retq
1399 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1400 ; AVX-NEXT: retq
1409 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
1410 ; SSE-NEXT: movapd %xmm1, %xmm0
1411 ; SSE-NEXT: retq
1415 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
1416 ; AVX-NEXT: retq
1425 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1426 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1427 ; SSE2-NEXT: retq
1431 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1432 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1433 ; SSSE3-NEXT: retq
1437 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1438 ; SSE41-NEXT: retq
1442 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1443 ; AVX-NEXT: retq
1452 ; SSE-NEXT: movaps %xmm1, %xmm0
1453 ; SSE-NEXT: retq
1457 ; AVX-NEXT: vmovaps %xmm1, %xmm0
1458 ; AVX-NEXT: retq
1467 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1468 ; SSE2-NEXT: movaps %xmm1, %xmm0
1469 ; SSE2-NEXT: retq
1473 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1474 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1475 ; SSSE3-NEXT: retq
1479 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1480 ; SSE41-NEXT: retq
1484 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1485 ; AVX1-NEXT: retq
1489 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1490 ; AVX2-NEXT: retq
1499 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1500 ; SSE-NEXT: retq
1504 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1505 ; AVX-NEXT: retq
1514 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
1515 ; SSE-NEXT: movdqa %xmm1, %xmm0
1516 ; SSE-NEXT: retq
1520 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
1521 ; AVX-NEXT: retq
1530 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1531 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1532 ; SSE2-NEXT: retq
1536 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1537 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1538 ; SSSE3-NEXT: retq
1542 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1543 ; SSE41-NEXT: retq
1547 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1548 ; AVX1-NEXT: retq
1552 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1553 ; AVX2-NEXT: retq
1562 ; ALL-NEXT: retq
1571 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1572 ; SSE2-NEXT: movaps %xmm1, %xmm0
1573 ; SSE2-NEXT: retq
1577 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1578 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1579 ; SSSE3-NEXT: retq
1583 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1584 ; SSE41-NEXT: retq
1588 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1589 ; AVX-NEXT: retq
1598 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1599 ; SSE-NEXT: retq
1603 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1604 ; AVX-NEXT: retq
1613 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1614 ; SSE-NEXT: retq
1618 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1619 ; AVX-NEXT: retq
1628 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1629 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1630 ; SSE2-NEXT: retq
1634 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1635 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1636 ; SSSE3-NEXT: retq
1640 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1641 ; SSE41-NEXT: retq
1645 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1646 ; AVX-NEXT: retq
1655 ; ALL-NEXT: retq
1664 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1665 ; SSE2-NEXT: movaps %xmm1, %xmm0
1666 ; SSE2-NEXT: retq
1670 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
1671 ; SSSE3-NEXT: movaps %xmm1, %xmm0
1672 ; SSSE3-NEXT: retq
1676 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1677 ; SSE41-NEXT: retq
1681 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1682 ; AVX1-NEXT: retq
1686 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1687 ; AVX2-NEXT: retq
1696 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1697 ; SSE-NEXT: retq
1701 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1702 ; AVX-NEXT: retq
1711 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1712 ; SSE-NEXT: retq
1716 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1717 ; AVX-NEXT: retq
1726 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1727 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1728 ; SSE2-NEXT: retq
1732 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
1733 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
1734 ; SSSE3-NEXT: retq
1738 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1739 ; SSE41-NEXT: retq
1743 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1744 ; AVX1-NEXT: retq
1748 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1749 ; AVX2-NEXT: retq
1758 ; SSE-NEXT: movdqa %xmm0, %xmm2
1759 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
1760 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1761 ; SSE-NEXT: movdqa %xmm2, (%rdi)
1762 ; SSE-NEXT: retq
1766 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1767 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1768 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1769 ; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
1770 ; AVX1-NEXT: vzeroupper
1771 ; AVX1-NEXT: retq
1775 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1776 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1777 ; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1778 ; AVX2-NEXT: vmovdqa %xmm2, (%rdi)
1779 ; AVX2-NEXT: vzeroupper
1780 ; AVX2-NEXT: retq
1790 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1791 ; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1792 ; SSE-NEXT: retq
1796 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1797 ; AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1798 ; AVX-NEXT: retq
1810 ; SSE-NEXT: movups %xmm0, (%rdi)
1811 ; SSE-NEXT: retq
1815 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1816 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0,1],xmm1[0],xmm0[3]
1817 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
1818 ; AVX-NEXT: vmovups %xmm0, (%rdi)
1819 ; AVX-NEXT: vzeroupper
1820 ; AVX-NEXT: retq
1835 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,0]
1836 ; SSE-NEXT: movaps %xmm1, %xmm0
1837 ; SSE-NEXT: retq
1841 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,1,2,0]
1842 ; AVX-NEXT: retq
1851 ; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
1852 ; SSE2-NEXT: movaps %xmm1, %xmm0
1853 ; SSE2-NEXT: retq
1857 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
1858 ; SSSE3-NEXT: retq
1862 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
1863 ; SSE41-NEXT: retq
1867 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
1868 ; AVX-NEXT: retq
1877 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
1878 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
1879 ; SSE2-NEXT: retq
1883 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
1884 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
1885 ; SSSE3-NEXT: retq
1889 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1890 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,3]
1891 ; SSE41-NEXT: retq
1895 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
1896 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,2,3]
1897 ; AVX-NEXT: retq
1906 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
1907 ; SSE-NEXT: movaps %xmm1, %xmm0
1908 ; SSE-NEXT: retq
1912 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[1,1,2,3]
1913 ; AVX-NEXT: retq
1925 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1926 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1927 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1928 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1929 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1930 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1931 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1932 ; SSE2-NEXT: retq
1936 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1937 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1938 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1939 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1940 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1941 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1942 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
1943 ; SSSE3-NEXT: retq
1947 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1948 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1949 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
1950 ; SSE41-NEXT: retq
1954 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1955 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1956 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1957 ; AVX1-NEXT: retq
1961 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1962 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1963 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1964 ; AVX2-NEXT: retq
1975 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1976 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1977 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1978 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1979 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1980 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1981 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1982 ; SSE2-NEXT: retq
1986 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1987 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1988 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1989 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1990 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1991 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1992 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1993 ; SSSE3-NEXT: retq
1997 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1998 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1999 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2000 ; SSE41-NEXT: retq
2004 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2005 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2006 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2007 ; AVX-NEXT: retq
2018 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2019 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2020 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2021 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2022 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2023 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2024 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2025 ; SSE2-NEXT: retq
2029 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2030 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2031 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2032 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2033 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2034 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2035 ; SSSE3-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2036 ; SSSE3-NEXT: retq
2040 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2041 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2042 ; SSE41-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2043 ; SSE41-NEXT: retq
2047 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2048 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2049 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2050 ; AVX-NEXT: retq
2061 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2062 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2063 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2064 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2065 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2066 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2067 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
2068 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
2069 ; SSE2-NEXT: retq
2073 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2074 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2075 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2076 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2077 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2078 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2079 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
2080 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
2081 ; SSSE3-NEXT: retq
2085 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2086 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2087 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
2088 ; SSE41-NEXT: retq
2092 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2093 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2094 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
2095 ; AVX1-NEXT: retq
2099 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2100 ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2101 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
2102 ; AVX2-NEXT: retq
2143 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2144 ; SSE2-NEXT: retq
2148 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2149 ; SSSE3-NEXT: retq
2153 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2154 ; SSE41-NEXT: retq
2158 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2159 ; AVX-NEXT: retq
2168 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
2169 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
2170 ; SSE2-NEXT: movaps %xmm1, %xmm0
2171 ; SSE2-NEXT: retq
2175 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
2176 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
2177 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2178 ; SSSE3-NEXT: retq
2182 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
2183 ; SSE41-NEXT: retq
2187 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
2188 ; AVX-NEXT: retq
2197 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
2198 ; SSE2-NEXT: movaps %xmm1, %xmm0
2199 ; SSE2-NEXT: retq
2203 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
2204 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2205 ; SSSE3-NEXT: retq
2209 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2210 ; SSE41-NEXT: retq
2214 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
2215 ; AVX-NEXT: retq
2225 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2226 ; SSE-NEXT: movdqa %xmm1, %xmm0
2227 ; SSE-NEXT: retq
2231 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2232 ; AVX-NEXT: retq
2241 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2242 ; SSE-NEXT: movdqa %xmm1, %xmm0
2243 ; SSE-NEXT: retq
2247 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2248 ; AVX-NEXT: retq
2257 ; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2258 ; SSE-NEXT: movdqa %xmm1, %xmm0
2259 ; SSE-NEXT: retq
2263 ; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2264 ; AVX-NEXT: retq
2277 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2278 ; SSE2-NEXT: retq
2282 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2283 ; SSSE3-NEXT: retq
2287 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2288 ; SSE41-NEXT: retq
2292 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2293 ; AVX-NEXT: retq
2302 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2303 ; SSE-NEXT: retq
2307 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2308 ; AVX-NEXT: retq
2317 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2318 ; SSE-NEXT: retq
2322 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2323 ; AVX-NEXT: retq
2332 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2333 ; SSE-NEXT: movapd %xmm1, %xmm0
2334 ; SSE-NEXT: retq
2338 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2339 ; AVX-NEXT: retq
2348 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2349 ; SSE2-NEXT: movapd %xmm1, %xmm0
2350 ; SSE2-NEXT: retq
2354 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2355 ; SSSE3-NEXT: movapd %xmm1, %xmm0
2356 ; SSSE3-NEXT: retq
2360 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
2361 ; SSE41-NEXT: retq
2365 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
2366 ; AVX-NEXT: retq
2379 ; ALL-NEXT: retq
2388 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2389 ; SSE2-NEXT: retq
2393 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2394 ; SSSE3-NEXT: retq
2398 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2399 ; SSE41-NEXT: retq
2403 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
2404 ; AVX-NEXT: retq
2413 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2414 ; SSE2-NEXT: retq
2418 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2419 ; SSSE3-NEXT: retq
2423 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2424 ; SSE41-NEXT: retq
2428 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
2429 ; AVX-NEXT: retq
2438 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
2439 ; SSE-NEXT: retq
2443 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
2444 ; AVX-NEXT: retq
2453 ; ALL-NEXT: retq
2462 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2463 ; SSE2-NEXT: retq
2467 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2468 ; SSSE3-NEXT: retq
2472 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2473 ; SSE41-NEXT: retq
2477 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2478 ; AVX-NEXT: retq
2487 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2488 ; SSE-NEXT: retq
2492 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2493 ; AVX-NEXT: retq
2502 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2503 ; SSE-NEXT: retq
2507 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2508 ; AVX-NEXT: retq
2517 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2518 ; SSE-NEXT: movapd %xmm1, %xmm0
2519 ; SSE-NEXT: retq
2523 ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
2524 ; AVX-NEXT: retq
2533 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2534 ; SSE2-NEXT: movapd %xmm1, %xmm0
2535 ; SSE2-NEXT: retq
2539 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2540 ; SSSE3-NEXT: movapd %xmm1, %xmm0
2541 ; SSSE3-NEXT: retq
2545 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
2546 ; SSE41-NEXT: retq
2550 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
2551 ; AVX-NEXT: retq
2570 ; ALL-NEXT: retq
2579 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2580 ; SSE2-NEXT: retq
2584 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2585 ; SSSE3-NEXT: retq
2589 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2590 ; SSE41-NEXT: retq
2594 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
2595 ; AVX-NEXT: retq
2604 ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2605 ; SSE2-NEXT
2609 ; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2610 ; SSSE3-NEXT: retq
2614 ; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
2615 ; SSE41-NEXT: retq
2619 ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
2620 ; AVX-NEXT: retq
2629 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
2630 ; SSE-NEXT: retq
2634 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
2635 ; AVX-NEXT: retq
2644 ; ALL-NEXT: retq
2658 ; SSE-NEXT: paddd {{.*}}(%rip), %xmm1
2659 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,2,1,0]
2660 ; SSE-NEXT: movdqa %xmm0, %xmm1
2661 ; SSE-NEXT: retq
2665 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2666 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2667 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2668 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2669 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
2670 ; AVX1-NEXT: retq
2674 ; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
2675 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2676 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
2677 ; AVX2-NEXT: retq
2686 ; SSE-NEXT: paddd {{.*}}(%rip), %xmm1
2687 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,2,1,0]
2688 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
2689 ; SSE-NEXT: retq
2693 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2694 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2695 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2696 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
2697 ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2698 ; AVX1-NEXT: retq
2702 ; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
2703 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
2704 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2705 ; AVX2-NEXT: retq
2714 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,0]
2715 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3]
2716 ; SSE2-NEXT: movaps %xmm1, %xmm0
2717 ; SSE2-NEXT: retq
2721 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,0]
2722 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3]
2723 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2724 ; SSSE3-NEXT: retq
2728 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3]
2729 ; SSE41-NEXT: retq
2733 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3]
2734 ; AVX-NEXT: retq
2744 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,0]
2745 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
2746 ; SSE2-NEXT: movaps %xmm1, %xmm0
2747 ; SSE2-NEXT: retq
2751 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,0]
2752 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
2753 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2754 ; SSSE3-NEXT: retq
2758 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3]
2759 ; SSE41-NEXT: retq
2763 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3]
2764 ; AVX-NEXT: retq
2774 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
2775 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
2776 ; SSE2-NEXT: retq
2780 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
2781 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
2782 ; SSSE3-NEXT: retq
2786 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
2787 ; SSE41-NEXT: retq
2791 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
2792 ; AVX-NEXT: retq
2802 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
2803 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
2804 ; SSE2-NEXT: retq
2808 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
2809 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
2810 ; SSSE3-NEXT: retq
2814 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
2815 ; SSE41-NEXT: retq
2819 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
2820 ; AVX-NEXT: retq
2831 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2832 ; SSE2-NEXT: xorps %xmm1, %xmm1
2833 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
2834 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
2835 ; SSE2-NEXT: movaps %xmm0, (%rsi)
2836 ; SSE2-NEXT: retq
2840 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2841 ; SSSE3-NEXT: xorps %xmm1, %xmm1
2842 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
2843 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
2844 ; SSSE3-NEXT: movaps %xmm0, (%rsi)
2845 ; SSSE3-NEXT: retq
2849 ; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2850 ; SSE41-NEXT: xorpd %xmm1, %xmm1
2851 ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
2852 ; SSE41-NEXT: movapd %xmm1, (%rsi)
2853 ; SSE41-NEXT: retq
2857 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2858 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
2859 ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
2860 ; AVX-NEXT: vmovapd %xmm0, (%rsi)
2861 ; AVX-NEXT: retq
2874 ; SSE-NEXT: movaps %xmm0, %xmm1
2875 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3,1,3]
2876 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,0,2]
2877 ; SSE-NEXT: addps %xmm0, %xmm1
2878 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2879 ; SSE-NEXT: retq
2883 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,3,1,3]
2884 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
2885 ; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm1
2886 ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2887 ; AVX-NEXT: retq
2899 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2]
2900 ; SSE2-NEXT: movaps %xmm0, %xmm2
2901 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
2902 ; SSE2-NEXT: addps %xmm0, %xmm2
2903 ; SSE2-NEXT: movaps %xmm2, %xmm0
2904 ; SSE2-NEXT: retq
2908 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2]
2909 ; SSSE3-NEXT: movaps %xmm0, %xmm2
2910 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
2911 ; SSSE3-NEXT: addps %xmm0, %xmm2
2912 ; SSSE3-NEXT: movaps %xmm2, %xmm0
2913 ; SSSE3-NEXT: retq
2917 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2]
2918 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
2919 ; SSE41-NEXT: addps %xmm1, %xmm0
2920 ; SSE41-NEXT: retq
2924 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,1,2]
2925 ; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
2926 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
2927 ; AVX-NEXT: retq
2938 ; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
2939 ; SSE2-NEXT: movapd %xmm2, %xmm0
2940 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2]
2941 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2]
2942 ; SSE2-NEXT: movaps %xmm3, %xmm1
2943 ; SSE2-NEXT: retq
2947 ; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
2948 ; SSSE3-NEXT: movapd %xmm2, %xmm0
2949 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2]
2950 ; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2]
2951 ; SSSE3-NEXT: movaps %xmm3, %xmm1
2952 ; SSSE3-NEXT: retq
2956 ; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
2957 ; SSE41-NEXT: movapd %xmm0, %xmm1
2958 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm3[3,2]
2959 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[3,2]
2960 ; SSE41-NEXT: movaps %xmm1, %xmm0
2961 ; SSE41-NEXT: movaps %xmm3, %xmm1
2962 ; SSE41-NEXT: retq
2966 ; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
2967 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
2968 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0],ymm1[3,2],ymm0[5,4],ymm1[7,6]
2969 ; AVX1-NEXT: retq
2973 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
2974 ; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
2975 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
2976 ; AVX2-NEXT: retq