Lines Matching full:xmm2
316 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
317 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
318 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
334 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15]
335 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
336 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
543 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
545 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
547 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
561 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
563 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
565 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
608 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
609 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7]
642 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
644 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
646 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
661 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
663 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
665 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
712 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
713 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
734 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
735 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
738 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
742 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
757 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
758 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
762 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
767 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
783 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
785 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
787 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
790 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
808 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
809 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
810 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
826 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
827 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
828 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
844 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
845 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
846 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
862 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
863 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
864 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
880 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
881 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
882 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
898 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
899 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
900 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
916 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
917 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
918 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
933 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
935 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
937 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
951 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
953 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
955 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
969 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
971 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
973 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
989 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
991 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
993 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1319 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1321 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1326 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1328 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1443 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1444 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
1477 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1479 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1481 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1495 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1497 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
1499 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1513 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1515 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
1517 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1531 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1533 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1535 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1629 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1631 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
1635 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1677 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
1680 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1689 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
1692 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1779 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1781 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1789 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1791 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1875 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
1876 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
1878 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1885 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
1886 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
1888 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1899 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1901 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1909 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1911 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1923 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1925 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1933 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1935 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1947 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
1949 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1957 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
1959 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1971 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1973 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1981 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1983 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
1995 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
1997 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2005 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2007 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2019 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
2020 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2022 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2029 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
2030 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2032 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2043 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
2044 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2046 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2053 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
2054 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2056 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2067 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
2068 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2070 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2077 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
2078 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2080 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2091 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
2092 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2094 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2101 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
2102 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2104 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2115 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2117 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2125 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2127 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2139 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2141 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2149 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2151 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2163 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2165 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2173 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2175 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2187 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
2189 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2197 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
2199 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2237 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2239 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2247 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2249 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2261 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2263 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2271 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2273 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2285 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2287 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2295 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2297 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2309 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
2310 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2311 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2327 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
2328 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2336 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm2
2337 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
2349 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
2350 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2351 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2367 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
2368 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2370 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2377 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
2378 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2380 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2413 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
2414 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2416 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2423 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
2424 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2426 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2436 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2438 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2439 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2448 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2449 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2452 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
2453 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2463 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2466 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
2470 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2477 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm2
2479 ; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
2490 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2492 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2493 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2502 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2503 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2506 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
2507 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2517 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2520 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2521 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2532 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7]
2533 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2534 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2544 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2545 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3]
2551 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
2560 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2563 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
2564 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,1,4,5,6,7]
2565 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
2566 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2578 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2579 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3]
2583 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
2592 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2594 ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm4
2595 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
2609 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2610 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2615 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2623 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2624 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2627 ; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,2,3,0,1,8,9,10,11,6,7,4,5]
2628 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
2639 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2641 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2642 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2651 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2652 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2655 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
2656 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
2666 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2668 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2669 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2678 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2679 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2682 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
2683 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
2693 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2694 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,2,1,3,4,5,6,7]
2698 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
2703 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2724 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
2725 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
2726 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7]
2748 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2750 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
2752 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
2755 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2784 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2787 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7]
2790 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2805 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2808 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
2810 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2825 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2828 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7]
2831 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2846 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2847 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2851 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
2861 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
2862 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2863 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2873 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2875 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7]
2878 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2895 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2896 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7]
2897 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
2898 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7]
2902 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2920 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2922 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
2925 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
2929 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2945 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2947 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
2952 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2971 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2973 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
2975 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3045 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3047 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
3052 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3071 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3073 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
3075 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3145 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3147 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
3152 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3171 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3173 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
3175 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3189 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3191 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
3196 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3215 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3217 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
3219 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3233 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3234 ; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
3237 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
3241 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0