Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-SLOW
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
     10 
     11 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
     12 ; SSE2-LABEL: zext_16i8_to_8i16:
     13 ; SSE2:       # %bb.0: # %entry
     14 ; SSE2-NEXT:    pxor %xmm1, %xmm1
     15 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     16 ; SSE2-NEXT:    retq
     17 ;
     18 ; SSSE3-LABEL: zext_16i8_to_8i16:
     19 ; SSSE3:       # %bb.0: # %entry
     20 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
     21 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     22 ; SSSE3-NEXT:    retq
     23 ;
     24 ; SSE41-LABEL: zext_16i8_to_8i16:
     25 ; SSE41:       # %bb.0: # %entry
     26 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     27 ; SSE41-NEXT:    retq
     28 ;
     29 ; AVX-LABEL: zext_16i8_to_8i16:
     30 ; AVX:       # %bb.0: # %entry
     31 ; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     32 ; AVX-NEXT:    retq
     33 entry:
     34   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     35   %C = zext <8 x i8> %B to <8 x i16>
     36   ret <8 x i16> %C
     37 }
     38 
     39 ; PR17654
     40 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
     41 ; SSE2-LABEL: zext_16i8_to_16i16:
     42 ; SSE2:       # %bb.0: # %entry
     43 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
     44 ; SSE2-NEXT:    pxor %xmm2, %xmm2
     45 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
     46 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
     47 ; SSE2-NEXT:    retq
     48 ;
     49 ; SSSE3-LABEL: zext_16i8_to_16i16:
     50 ; SSSE3:       # %bb.0: # %entry
     51 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
     52 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
     53 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
     54 ; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
     55 ; SSSE3-NEXT:    retq
     56 ;
     57 ; SSE41-LABEL: zext_16i8_to_16i16:
     58 ; SSE41:       # %bb.0: # %entry
     59 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     60 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     61 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     62 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
     63 ; SSE41-NEXT:    retq
     64 ;
     65 ; AVX1-LABEL: zext_16i8_to_16i16:
     66 ; AVX1:       # %bb.0: # %entry
     67 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     68 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     69 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     70 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     71 ; AVX1-NEXT:    retq
     72 ;
     73 ; AVX2-LABEL: zext_16i8_to_16i16:
     74 ; AVX2:       # %bb.0: # %entry
     75 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
     76 ; AVX2-NEXT:    retq
     77 ;
     78 ; AVX512-LABEL: zext_16i8_to_16i16:
     79 ; AVX512:       # %bb.0: # %entry
     80 ; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
     81 ; AVX512-NEXT:    retq
     82 entry:
     83   %B = zext <16 x i8> %A to <16 x i16>
     84   ret <16 x i16> %B
     85 }
     86 
     87 define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) {
     88 ; SSE2-LABEL: zext_32i8_to_32i16:
     89 ; SSE2:       # %bb.0: # %entry
     90 ; SSE2-NEXT:    movdqa %xmm1, %xmm3
     91 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
     92 ; SSE2-NEXT:    pxor %xmm4, %xmm4
     93 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
     94 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
     95 ; SSE2-NEXT:    movdqa %xmm3, %xmm2
     96 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
     97 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
     98 ; SSE2-NEXT:    retq
     99 ;
    100 ; SSSE3-LABEL: zext_32i8_to_32i16:
    101 ; SSSE3:       # %bb.0: # %entry
    102 ; SSSE3-NEXT:    movdqa %xmm1, %xmm3
    103 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    104 ; SSSE3-NEXT:    pxor %xmm4, %xmm4
    105 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
    106 ; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
    107 ; SSSE3-NEXT:    movdqa %xmm3, %xmm2
    108 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
    109 ; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
    110 ; SSSE3-NEXT:    retq
    111 ;
    112 ; SSE41-LABEL: zext_32i8_to_32i16:
    113 ; SSE41:       # %bb.0: # %entry
    114 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    115 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    116 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    117 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    118 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
    119 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    120 ; SSE41-NEXT:    movdqa %xmm5, %xmm0
    121 ; SSE41-NEXT:    movdqa %xmm4, %xmm1
    122 ; SSE41-NEXT:    retq
    123 ;
    124 ; AVX1-LABEL: zext_32i8_to_32i16:
    125 ; AVX1:       # %bb.0: # %entry
    126 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    127 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
    128 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
    129 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
    130 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    131 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    132 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    133 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    134 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
    135 ; AVX1-NEXT:    vmovaps %ymm2, %ymm0
    136 ; AVX1-NEXT:    retq
    137 ;
    138 ; AVX2-LABEL: zext_32i8_to_32i16:
    139 ; AVX2:       # %bb.0: # %entry
    140 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    141 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
    142 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    143 ; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
    144 ; AVX2-NEXT:    retq
    145 ;
    146 ; AVX512F-LABEL: zext_32i8_to_32i16:
    147 ; AVX512F:       # %bb.0: # %entry
    148 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    149 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
    150 ; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    151 ; AVX512F-NEXT:    vmovdqa %ymm2, %ymm0
    152 ; AVX512F-NEXT:    retq
    153 ;
    154 ; AVX512BW-LABEL: zext_32i8_to_32i16:
    155 ; AVX512BW:       # %bb.0: # %entry
    156 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
    157 ; AVX512BW-NEXT:    retq
    158 entry:
    159   %B = zext <32 x i8> %A to <32 x i16>
    160   ret <32 x i16> %B
    161 }
    162 
    163 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
    164 ; SSE2-LABEL: zext_16i8_to_4i32:
    165 ; SSE2:       # %bb.0: # %entry
    166 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    167 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    168 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    169 ; SSE2-NEXT:    retq
    170 ;
    171 ; SSSE3-LABEL: zext_16i8_to_4i32:
    172 ; SSSE3:       # %bb.0: # %entry
    173 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    174 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    175 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    176 ; SSSE3-NEXT:    retq
    177 ;
    178 ; SSE41-LABEL: zext_16i8_to_4i32:
    179 ; SSE41:       # %bb.0: # %entry
    180 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    181 ; SSE41-NEXT:    retq
    182 ;
    183 ; AVX-LABEL: zext_16i8_to_4i32:
    184 ; AVX:       # %bb.0: # %entry
    185 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    186 ; AVX-NEXT:    retq
    187 entry:
    188   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    189   %C = zext <4 x i8> %B to <4 x i32>
    190   ret <4 x i32> %C
    191 }
    192 
    193 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
    194 ; SSE2-LABEL: zext_16i8_to_8i32:
    195 ; SSE2:       # %bb.0: # %entry
    196 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    197 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    198 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    199 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    200 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    201 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    202 ; SSE2-NEXT:    retq
    203 ;
    204 ; SSSE3-LABEL: zext_16i8_to_8i32:
    205 ; SSSE3:       # %bb.0: # %entry
    206 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    207 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    208 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    209 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    210 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    211 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    212 ; SSSE3-NEXT:    retq
    213 ;
    214 ; SSE41-LABEL: zext_16i8_to_8i32:
    215 ; SSE41:       # %bb.0: # %entry
    216 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    217 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    218 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    219 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    220 ; SSE41-NEXT:    retq
    221 ;
    222 ; AVX1-LABEL: zext_16i8_to_8i32:
    223 ; AVX1:       # %bb.0: # %entry
    224 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    225 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    226 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    227 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    228 ; AVX1-NEXT:    retq
    229 ;
    230 ; AVX2-LABEL: zext_16i8_to_8i32:
    231 ; AVX2:       # %bb.0: # %entry
    232 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    233 ; AVX2-NEXT:    retq
    234 ;
    235 ; AVX512-LABEL: zext_16i8_to_8i32:
    236 ; AVX512:       # %bb.0: # %entry
    237 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    238 ; AVX512-NEXT:    retq
    239 entry:
    240   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    241   %C = zext <8 x i8> %B to <8 x i32>
    242   ret <8 x i32> %C
    243 }
    244 
    245 define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp {
    246 ; SSE2-LABEL: zext_16i8_to_16i32:
    247 ; SSE2:       # %bb.0: # %entry
    248 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
    249 ; SSE2-NEXT:    pxor %xmm4, %xmm4
    250 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    251 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
    252 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    253 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
    254 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
    255 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
    256 ; SSE2-NEXT:    movdqa %xmm3, %xmm2
    257 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
    258 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
    259 ; SSE2-NEXT:    retq
    260 ;
    261 ; SSSE3-LABEL: zext_16i8_to_16i32:
    262 ; SSSE3:       # %bb.0: # %entry
    263 ; SSSE3-NEXT:    movdqa %xmm0, %xmm3
    264 ; SSSE3-NEXT:    pxor %xmm4, %xmm4
    265 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    266 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
    267 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    268 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
    269 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
    270 ; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
    271 ; SSSE3-NEXT:    movdqa %xmm3, %xmm2
    272 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
    273 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
    274 ; SSSE3-NEXT:    retq
    275 ;
    276 ; SSE41-LABEL: zext_16i8_to_16i32:
    277 ; SSE41:       # %bb.0: # %entry
    278 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    279 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    280 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
    281 ; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
    282 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
    283 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
    284 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    285 ; SSE41-NEXT:    movdqa %xmm4, %xmm0
    286 ; SSE41-NEXT:    retq
    287 ;
    288 ; AVX1-LABEL: zext_16i8_to_16i32:
    289 ; AVX1:       # %bb.0: # %entry
    290 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    291 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
    292 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
    293 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
    294 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    295 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
    296 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
    297 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    298 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
    299 ; AVX1-NEXT:    vmovaps %ymm2, %ymm0
    300 ; AVX1-NEXT:    retq
    301 ;
    302 ; AVX2-LABEL: zext_16i8_to_16i32:
    303 ; AVX2:       # %bb.0: # %entry
    304 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    305 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    306 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    307 ; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
    308 ; AVX2-NEXT:    retq
    309 ;
    310 ; AVX512-LABEL: zext_16i8_to_16i32:
    311 ; AVX512:       # %bb.0: # %entry
    312 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    313 ; AVX512-NEXT:    retq
    314 entry:
    315   %B = zext <16 x i8> %A to <16 x i32>
    316   ret <16 x i32> %B
    317 }
    318 
    319 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
    320 ; SSE2-LABEL: zext_16i8_to_2i64:
    321 ; SSE2:       # %bb.0: # %entry
    322 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    323 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    324 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    325 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    326 ; SSE2-NEXT:    retq
    327 ;
    328 ; SSSE3-LABEL: zext_16i8_to_2i64:
    329 ; SSSE3:       # %bb.0: # %entry
    330 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    331 ; SSSE3-NEXT:    retq
    332 ;
    333 ; SSE41-LABEL: zext_16i8_to_2i64:
    334 ; SSE41:       # %bb.0: # %entry
    335 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    336 ; SSE41-NEXT:    retq
    337 ;
    338 ; AVX-LABEL: zext_16i8_to_2i64:
    339 ; AVX:       # %bb.0: # %entry
    340 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    341 ; AVX-NEXT:    retq
    342 entry:
    343   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
    344   %C = zext <2 x i8> %B to <2 x i64>
    345   ret <2 x i64> %C
    346 }
    347 
    348 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
    349 ; SSE2-LABEL: zext_16i8_to_4i64:
    350 ; SSE2:       # %bb.0: # %entry
    351 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    352 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    353 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    354 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    355 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    356 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    357 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    358 ; SSE2-NEXT:    retq
    359 ;
    360 ; SSSE3-LABEL: zext_16i8_to_4i64:
    361 ; SSSE3:       # %bb.0: # %entry
    362 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    363 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    364 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
    365 ; SSSE3-NEXT:    retq
    366 ;
    367 ; SSE41-LABEL: zext_16i8_to_4i64:
    368 ; SSE41:       # %bb.0: # %entry
    369 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    370 ; SSE41-NEXT:    psrld $16, %xmm0
    371 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    372 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    373 ; SSE41-NEXT:    retq
    374 ;
    375 ; AVX1-LABEL: zext_16i8_to_4i64:
    376 ; AVX1:       # %bb.0: # %entry
    377 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    378 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
    379 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    380 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    381 ; AVX1-NEXT:    retq
    382 ;
    383 ; AVX2-LABEL: zext_16i8_to_4i64:
    384 ; AVX2:       # %bb.0: # %entry
    385 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
    386 ; AVX2-NEXT:    retq
    387 ;
    388 ; AVX512-LABEL: zext_16i8_to_4i64:
    389 ; AVX512:       # %bb.0: # %entry
    390 ; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
    391 ; AVX512-NEXT:    retq
    392 entry:
    393   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    394   %C = zext <4 x i8> %B to <4 x i64>
    395   ret <4 x i64> %C
    396 }
    397 
    398 define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp {
    399 ; SSE2-LABEL: zext_16i8_to_8i64:
    400 ; SSE2:       # %bb.0: # %entry
    401 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    402 ; SSE2-NEXT:    pxor %xmm4, %xmm4
    403 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
    404 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
    405 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
    406 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    407 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
    408 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
    409 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
    410 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
    411 ; SSE2-NEXT:    movdqa %xmm3, %xmm2
    412 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
    413 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
    414 ; SSE2-NEXT:    retq
    415 ;
    416 ; SSSE3-LABEL: zext_16i8_to_8i64:
    417 ; SSSE3:       # %bb.0: # %entry
    418 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    419 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128]
    420 ; SSSE3-NEXT:    pshufb %xmm4, %xmm0
    421 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128]
    422 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
    423 ; SSSE3-NEXT:    pshufb %xmm5, %xmm1
    424 ; SSSE3-NEXT:    movdqa %xmm3, %xmm2
    425 ; SSSE3-NEXT:    pshufb %xmm4, %xmm2
    426 ; SSSE3-NEXT:    pshufb %xmm5, %xmm3
    427 ; SSSE3-NEXT:    retq
    428 ;
    429 ; SSE41-LABEL: zext_16i8_to_8i64:
    430 ; SSE41:       # %bb.0: # %entry
    431 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    432 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    433 ; SSE41-NEXT:    psrld $16, %xmm1
    434 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
    435 ; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
    436 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
    437 ; SSE41-NEXT:    psrlq $48, %xmm0
    438 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    439 ; SSE41-NEXT:    movdqa %xmm4, %xmm0
    440 ; SSE41-NEXT:    retq
    441 ;
    442 ; AVX1-LABEL: zext_16i8_to_8i64:
    443 ; AVX1:       # %bb.0: # %entry
    444 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    445 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm2
    446 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
    447 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
    448 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    449 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
    450 ; AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
    451 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    452 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
    453 ; AVX1-NEXT:    vmovaps %ymm2, %ymm0
    454 ; AVX1-NEXT:    retq
    455 ;
    456 ; AVX2-LABEL: zext_16i8_to_8i64:
    457 ; AVX2:       # %bb.0: # %entry
    458 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
    459 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    460 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
    461 ; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
    462 ; AVX2-NEXT:    retq
    463 ;
    464 ; AVX512-LABEL: zext_16i8_to_8i64:
    465 ; AVX512:       # %bb.0: # %entry
    466 ; AVX512-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
    467 ; AVX512-NEXT:    retq
    468 entry:
    469   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    470   %C = zext <8 x i8> %B to <8 x i64>
    471   ret <8 x i64> %C
    472 }
    473 
    474 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
    475 ; SSE2-LABEL: zext_8i16_to_4i32:
    476 ; SSE2:       # %bb.0: # %entry
    477 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    478 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    479 ; SSE2-NEXT:    retq
    480 ;
    481 ; SSSE3-LABEL: zext_8i16_to_4i32:
    482 ; SSSE3:       # %bb.0: # %entry
    483 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    484 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    485 ; SSSE3-NEXT:    retq
    486 ;
    487 ; SSE41-LABEL: zext_8i16_to_4i32:
    488 ; SSE41:       # %bb.0: # %entry
    489 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    490 ; SSE41-NEXT:    retq
    491 ;
    492 ; AVX-LABEL: zext_8i16_to_4i32:
    493 ; AVX:       # %bb.0: # %entry
    494 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    495 ; AVX-NEXT:    retq
    496 entry:
    497   %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    498   %C = zext <4 x i16> %B to <4 x i32>
    499   ret <4 x i32> %C
    500 }
    501 
    502 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
    503 ; SSE2-LABEL: zext_8i16_to_8i32:
    504 ; SSE2:       # %bb.0: # %entry
    505 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    506 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    507 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    508 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    509 ; SSE2-NEXT:    retq
    510 ;
    511 ; SSSE3-LABEL: zext_8i16_to_8i32:
    512 ; SSSE3:       # %bb.0: # %entry
    513 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    514 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    515 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    516 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    517 ; SSSE3-NEXT:    retq
    518 ;
    519 ; SSE41-LABEL: zext_8i16_to_8i32:
    520 ; SSE41:       # %bb.0: # %entry
    521 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    522 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    523 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    524 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    525 ; SSE41-NEXT:    retq
    526 ;
    527 ; AVX1-LABEL: zext_8i16_to_8i32:
    528 ; AVX1:       # %bb.0: # %entry
    529 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    530 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    531 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    532 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    533 ; AVX1-NEXT:    retq
    534 ;
    535 ; AVX2-LABEL: zext_8i16_to_8i32:
    536 ; AVX2:       # %bb.0: # %entry
    537 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    538 ; AVX2-NEXT:    retq
    539 ;
    540 ; AVX512-LABEL: zext_8i16_to_8i32:
    541 ; AVX512:       # %bb.0: # %entry
    542 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    543 ; AVX512-NEXT:    retq
    544 entry:
    545   %B = zext <8 x i16> %A to <8 x i32>
    546   ret <8 x i32>%B
    547 }
    548 
    549 define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp {
    550 ; SSE2-LABEL: zext_16i16_to_16i32:
    551 ; SSE2:       # %bb.0: # %entry
    552 ; SSE2-NEXT:    movdqa %xmm1, %xmm3
    553 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    554 ; SSE2-NEXT:    pxor %xmm4, %xmm4
    555 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
    556 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
    557 ; SSE2-NEXT:    movdqa %xmm3, %xmm2
    558 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
    559 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
    560 ; SSE2-NEXT:    retq
    561 ;
    562 ; SSSE3-LABEL: zext_16i16_to_16i32:
    563 ; SSSE3:       # %bb.0: # %entry
    564 ; SSSE3-NEXT:    movdqa %xmm1, %xmm3
    565 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    566 ; SSSE3-NEXT:    pxor %xmm4, %xmm4
    567 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
    568 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
    569 ; SSSE3-NEXT:    movdqa %xmm3, %xmm2
    570 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
    571 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
    572 ; SSSE3-NEXT:    retq
    573 ;
    574 ; SSE41-LABEL: zext_16i16_to_16i32:
    575 ; SSE41:       # %bb.0: # %entry
    576 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    577 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
    578 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    579 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    580 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
    581 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    582 ; SSE41-NEXT:    movdqa %xmm5, %xmm0
    583 ; SSE41-NEXT:    movdqa %xmm4, %xmm1
    584 ; SSE41-NEXT:    retq
    585 ;
    586 ; AVX1-LABEL: zext_16i16_to_16i32:
    587 ; AVX1:       # %bb.0: # %entry
    588 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    589 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
    590 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
    591 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
    592 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    593 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    594 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    595 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    596 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
    597 ; AVX1-NEXT:    vmovaps %ymm2, %ymm0
    598 ; AVX1-NEXT:    retq
    599 ;
    600 ; AVX2-LABEL: zext_16i16_to_16i32:
    601 ; AVX2:       # %bb.0: # %entry
    602 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    603 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
    604 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    605 ; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
    606 ; AVX2-NEXT:    retq
    607 ;
    608 ; AVX512-LABEL: zext_16i16_to_16i32:
    609 ; AVX512:       # %bb.0: # %entry
    610 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    611 ; AVX512-NEXT:    retq
    612 entry:
    613   %B = zext <16 x i16> %A to <16 x i32>
    614   ret <16 x i32> %B
    615 }
    616 
    617 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
    618 ; SSE2-LABEL: zext_8i16_to_2i64:
    619 ; SSE2:       # %bb.0: # %entry
    620 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    621 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    622 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    623 ; SSE2-NEXT:    retq
    624 ;
    625 ; SSSE3-LABEL: zext_8i16_to_2i64:
    626 ; SSSE3:       # %bb.0: # %entry
    627 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    628 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    629 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    630 ; SSSE3-NEXT:    retq
    631 ;
    632 ; SSE41-LABEL: zext_8i16_to_2i64:
    633 ; SSE41:       # %bb.0: # %entry
    634 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    635 ; SSE41-NEXT:    retq
    636 ;
    637 ; AVX-LABEL: zext_8i16_to_2i64:
    638 ; AVX:       # %bb.0: # %entry
    639 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    640 ; AVX-NEXT:    retq
    641 entry:
    642   %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
    643   %C = zext <2 x i16> %B to <2 x i64>
    644   ret <2 x i64> %C
    645 }
    646 
    647 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
    648 ; SSE2-LABEL: zext_8i16_to_4i64:
    649 ; SSE2:       # %bb.0: # %entry
    650 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    651 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    652 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    653 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    654 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    655 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    656 ; SSE2-NEXT:    retq
    657 ;
    658 ; SSSE3-LABEL: zext_8i16_to_4i64:
    659 ; SSSE3:       # %bb.0: # %entry
    660 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    661 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    662 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    663 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    664 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    665 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    666 ; SSSE3-NEXT:    retq
    667 ;
    668 ; SSE41-LABEL: zext_8i16_to_4i64:
    669 ; SSE41:       # %bb.0: # %entry
    670 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    671 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    672 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    673 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    674 ; SSE41-NEXT:    retq
    675 ;
    676 ; AVX1-LABEL: zext_8i16_to_4i64:
    677 ; AVX1:       # %bb.0: # %entry
    678 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    679 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    680 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    681 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    682 ; AVX1-NEXT:    retq
    683 ;
    684 ; AVX2-LABEL: zext_8i16_to_4i64:
    685 ; AVX2:       # %bb.0: # %entry
    686 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    687 ; AVX2-NEXT:    retq
    688 ;
    689 ; AVX512-LABEL: zext_8i16_to_4i64:
    690 ; AVX512:       # %bb.0: # %entry
    691 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    692 ; AVX512-NEXT:    retq
    693 entry:
    694   %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    695   %C = zext <4 x i16> %B to <4 x i64>
    696   ret <4 x i64> %C
    697 }
    698 
    699 define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp {
    700 ; SSE2-LABEL: zext_8i16_to_8i64:
    701 ; SSE2:       # %bb.0: # %entry
    702 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
    703 ; SSE2-NEXT:    pxor %xmm4, %xmm4
    704 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    705 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
    706 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    707 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
    708 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
    709 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
    710 ; SSE2-NEXT:    movdqa %xmm3, %xmm2
    711 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
    712 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
    713 ; SSE2-NEXT:    retq
    714 ;
    715 ; SSSE3-LABEL: zext_8i16_to_8i64:
    716 ; SSSE3:       # %bb.0: # %entry
    717 ; SSSE3-NEXT:    movdqa %xmm0, %xmm3
    718 ; SSSE3-NEXT:    pxor %xmm4, %xmm4
    719 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    720 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
    721 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    722 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
    723 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
    724 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
    725 ; SSSE3-NEXT:    movdqa %xmm3, %xmm2
    726 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
    727 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
    728 ; SSSE3-NEXT:    retq
    729 ;
    730 ; SSE41-LABEL: zext_8i16_to_8i64:
    731 ; SSE41:       # %bb.0: # %entry
    732 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    733 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    734 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
    735 ; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
    736 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
    737 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
    738 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    739 ; SSE41-NEXT:    movdqa %xmm4, %xmm0
    740 ; SSE41-NEXT:    retq
    741 ;
    742 ; AVX1-LABEL: zext_8i16_to_8i64:
    743 ; AVX1:       # %bb.0: # %entry
    744 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    745 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
    746 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
    747 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
    748 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    749 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
    750 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
    751 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    752 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
    753 ; AVX1-NEXT:    vmovaps %ymm2, %ymm0
    754 ; AVX1-NEXT:    retq
    755 ;
    756 ; AVX2-LABEL: zext_8i16_to_8i64:
    757 ; AVX2:       # %bb.0: # %entry
    758 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    759 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    760 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    761 ; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
    762 ; AVX2-NEXT:    retq
    763 ;
    764 ; AVX512-LABEL: zext_8i16_to_8i64:
    765 ; AVX512:       # %bb.0: # %entry
    766 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    767 ; AVX512-NEXT:    retq
    768 entry:
    769   %B = zext <8 x i16> %A to <8 x i64>
    770   ret <8 x i64> %B
    771 }
    772 
    773 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
    774 ; SSE2-LABEL: zext_4i32_to_2i64:
    775 ; SSE2:       # %bb.0: # %entry
    776 ; SSE2-NEXT:    xorps %xmm1, %xmm1
    777 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    778 ; SSE2-NEXT:    retq
    779 ;
    780 ; SSSE3-LABEL: zext_4i32_to_2i64:
    781 ; SSSE3:       # %bb.0: # %entry
    782 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
    783 ; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    784 ; SSSE3-NEXT:    retq
    785 ;
    786 ; SSE41-LABEL: zext_4i32_to_2i64:
    787 ; SSE41:       # %bb.0: # %entry
    788 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    789 ; SSE41-NEXT:    retq
    790 ;
    791 ; AVX-LABEL: zext_4i32_to_2i64:
    792 ; AVX:       # %bb.0: # %entry
    793 ; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    794 ; AVX-NEXT:    retq
    795 entry:
    796   %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
    797   %C = zext <2 x i32> %B to <2 x i64>
    798   ret <2 x i64> %C
    799 }
    800 
    801 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
    802 ; SSE2-LABEL: zext_4i32_to_4i64:
    803 ; SSE2:       # %bb.0: # %entry
    804 ; SSE2-NEXT:    movaps %xmm0, %xmm1
    805 ; SSE2-NEXT:    xorps %xmm2, %xmm2
    806 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    807 ; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    808 ; SSE2-NEXT:    retq
    809 ;
    810 ; SSSE3-LABEL: zext_4i32_to_4i64:
    811 ; SSSE3:       # %bb.0: # %entry
    812 ; SSSE3-NEXT:    movaps %xmm0, %xmm1
    813 ; SSSE3-NEXT:    xorps %xmm2, %xmm2
    814 ; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    815 ; SSSE3-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    816 ; SSSE3-NEXT:    retq
    817 ;
    818 ; SSE41-LABEL: zext_4i32_to_4i64:
    819 ; SSE41:       # %bb.0: # %entry
    820 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
    821 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    822 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
    823 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    824 ; SSE41-NEXT:    retq
    825 ;
    826 ; AVX1-LABEL: zext_4i32_to_4i64:
    827 ; AVX1:       # %bb.0: # %entry
    828 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
    829 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    830 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    831 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    832 ; AVX1-NEXT:    retq
    833 ;
    834 ; AVX2-LABEL: zext_4i32_to_4i64:
    835 ; AVX2:       # %bb.0: # %entry
    836 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    837 ; AVX2-NEXT:    retq
    838 ;
    839 ; AVX512-LABEL: zext_4i32_to_4i64:
    840 ; AVX512:       # %bb.0: # %entry
    841 ; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    842 ; AVX512-NEXT:    retq
    843 entry:
    844   %B = zext <4 x i32> %A to <4 x i64>
    845   ret <4 x i64>%B
    846 }
    847 
    848 define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp {
    849 ; SSE2-LABEL: zext_8i32_to_8i64:
    850 ; SSE2:       # %bb.0: # %entry
    851 ; SSE2-NEXT:    movaps %xmm1, %xmm3
    852 ; SSE2-NEXT:    movaps %xmm0, %xmm1
    853 ; SSE2-NEXT:    xorps %xmm4, %xmm4
    854 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
    855 ; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
    856 ; SSE2-NEXT:    movaps %xmm3, %xmm2
    857 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
    858 ; SSE2-NEXT:    unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
    859 ; SSE2-NEXT:    retq
    860 ;
    861 ; SSSE3-LABEL: zext_8i32_to_8i64:
    862 ; SSSE3:       # %bb.0: # %entry
    863 ; SSSE3-NEXT:    movaps %xmm1, %xmm3
    864 ; SSSE3-NEXT:    movaps %xmm0, %xmm1
    865 ; SSSE3-NEXT:    xorps %xmm4, %xmm4
    866 ; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
    867 ; SSSE3-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
    868 ; SSSE3-NEXT:    movaps %xmm3, %xmm2
    869 ; SSSE3-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
    870 ; SSSE3-NEXT:    unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
    871 ; SSSE3-NEXT:    retq
    872 ;
    873 ; SSE41-LABEL: zext_8i32_to_8i64:
    874 ; SSE41:       # %bb.0: # %entry
    875 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero
    876 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
    877 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    878 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero
    879 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
    880 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
    881 ; SSE41-NEXT:    movdqa %xmm5, %xmm0
    882 ; SSE41-NEXT:    movdqa %xmm4, %xmm1
    883 ; SSE41-NEXT:    retq
    884 ;
    885 ; AVX1-LABEL: zext_8i32_to_8i64:
    886 ; AVX1:       # %bb.0: # %entry
    887 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
    888 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
    889 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
    890 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
    891 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    892 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
    893 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    894 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    895 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
    896 ; AVX1-NEXT:    vmovaps %ymm2, %ymm0
    897 ; AVX1-NEXT:    retq
    898 ;
    899 ; AVX2-LABEL: zext_8i32_to_8i64:
    900 ; AVX2:       # %bb.0: # %entry
    901 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    902 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
    903 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    904 ; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
    905 ; AVX2-NEXT:    retq
    906 ;
    907 ; AVX512-LABEL: zext_8i32_to_8i64:
    908 ; AVX512:       # %bb.0: # %entry
    909 ; AVX512-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
    910 ; AVX512-NEXT:    retq
    911 entry:
    912   %B = zext <8 x i32> %A to <8 x i64>
    913   ret <8 x i64>%B
    914 }
    915 
    916 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
    917 ; SSE2-LABEL: load_zext_2i8_to_2i64:
    918 ; SSE2:       # %bb.0: # %entry
    919 ; SSE2-NEXT:    movzwl (%rdi), %eax
    920 ; SSE2-NEXT:    movd %eax, %xmm0
    921 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    922 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    923 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    924 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    925 ; SSE2-NEXT:    retq
    926 ;
    927 ; SSSE3-LABEL: load_zext_2i8_to_2i64:
    928 ; SSSE3:       # %bb.0: # %entry
    929 ; SSSE3-NEXT:    movzwl (%rdi), %eax
    930 ; SSSE3-NEXT:    movd %eax, %xmm0
    931 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    932 ; SSSE3-NEXT:    retq
    933 ;
    934 ; SSE41-LABEL: load_zext_2i8_to_2i64:
    935 ; SSE41:       # %bb.0: # %entry
    936 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    937 ; SSE41-NEXT:    retq
    938 ;
    939 ; AVX-LABEL: load_zext_2i8_to_2i64:
    940 ; AVX:       # %bb.0: # %entry
    941 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    942 ; AVX-NEXT:    retq
    943 entry:
    944  %X = load <2 x i8>, <2 x i8>* %ptr
    945  %Y = zext <2 x i8> %X to <2 x i64>
    946  ret <2 x i64> %Y
    947 }
    948 
    949 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
    950 ; SSE2-LABEL: load_zext_4i8_to_4i32:
    951 ; SSE2:       # %bb.0: # %entry
    952 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    953 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    954 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    955 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    956 ; SSE2-NEXT:    retq
    957 ;
    958 ; SSSE3-LABEL: load_zext_4i8_to_4i32:
    959 ; SSSE3:       # %bb.0: # %entry
    960 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    961 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    962 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    963 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    964 ; SSSE3-NEXT:    retq
    965 ;
    966 ; SSE41-LABEL: load_zext_4i8_to_4i32:
    967 ; SSE41:       # %bb.0: # %entry
    968 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    969 ; SSE41-NEXT:    retq
    970 ;
    971 ; AVX-LABEL: load_zext_4i8_to_4i32:
    972 ; AVX:       # %bb.0: # %entry
    973 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    974 ; AVX-NEXT:    retq
    975 entry:
    976  %X = load <4 x i8>, <4 x i8>* %ptr
    977  %Y = zext <4 x i8> %X to <4 x i32>
    978  ret <4 x i32> %Y
    979 }
    980 
    981 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
    982 ; SSE2-LABEL: load_zext_4i8_to_4i64:
    983 ; SSE2:       # %bb.0: # %entry
    984 ; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
    985 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    986 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    987 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    988 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    989 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    990 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    991 ; SSE2-NEXT:    retq
    992 ;
    993 ; SSSE3-LABEL: load_zext_4i8_to_4i64:
    994 ; SSSE3:       # %bb.0: # %entry
    995 ; SSSE3-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
    996 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    997 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    998 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
    999 ; SSSE3-NEXT:    retq
   1000 ;
   1001 ; SSE41-LABEL: load_zext_4i8_to_4i64:
   1002 ; SSE41:       # %bb.0: # %entry
   1003 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   1004 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   1005 ; SSE41-NEXT:    retq
   1006 ;
   1007 ; AVX1-LABEL: load_zext_4i8_to_4i64:
   1008 ; AVX1:       # %bb.0: # %entry
   1009 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   1010 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   1011 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1012 ; AVX1-NEXT:    retq
   1013 ;
   1014 ; AVX2-LABEL: load_zext_4i8_to_4i64:
   1015 ; AVX2:       # %bb.0: # %entry
   1016 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
   1017 ; AVX2-NEXT:    retq
   1018 ;
   1019 ; AVX512-LABEL: load_zext_4i8_to_4i64:
   1020 ; AVX512:       # %bb.0: # %entry
   1021 ; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
   1022 ; AVX512-NEXT:    retq
   1023 entry:
   1024  %X = load <4 x i8>, <4 x i8>* %ptr
   1025  %Y = zext <4 x i8> %X to <4 x i64>
   1026  ret <4 x i64> %Y
   1027 }
   1028 
   1029 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
   1030 ; SSE2-LABEL: load_zext_8i8_to_8i16:
   1031 ; SSE2:       # %bb.0: # %entry
   1032 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
   1033 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1034 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1035 ; SSE2-NEXT:    retq
   1036 ;
   1037 ; SSSE3-LABEL: load_zext_8i8_to_8i16:
   1038 ; SSSE3:       # %bb.0: # %entry
   1039 ; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
   1040 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1041 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1042 ; SSSE3-NEXT:    retq
   1043 ;
   1044 ; SSE41-LABEL: load_zext_8i8_to_8i16:
   1045 ; SSE41:       # %bb.0: # %entry
   1046 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
   1047 ; SSE41-NEXT:    retq
   1048 ;
   1049 ; AVX-LABEL: load_zext_8i8_to_8i16:
   1050 ; AVX:       # %bb.0: # %entry
   1051 ; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
   1052 ; AVX-NEXT:    retq
   1053 entry:
   1054  %X = load <8 x i8>, <8 x i8>* %ptr
   1055  %Y = zext <8 x i8> %X to <8 x i16>
   1056  ret <8 x i16> %Y
   1057 }
   1058 
   1059 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
   1060 ; SSE2-LABEL: load_zext_8i8_to_8i32:
   1061 ; SSE2:       # %bb.0: # %entry
   1062 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
   1063 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1064 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1065 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1066 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1067 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1068 ; SSE2-NEXT:    retq
   1069 ;
   1070 ; SSSE3-LABEL: load_zext_8i8_to_8i32:
   1071 ; SSSE3:       # %bb.0: # %entry
   1072 ; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
   1073 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1074 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1075 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1076 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1077 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1078 ; SSSE3-NEXT:    retq
   1079 ;
   1080 ; SSE41-LABEL: load_zext_8i8_to_8i32:
   1081 ; SSE41:       # %bb.0: # %entry
   1082 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
   1083 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
   1084 ; SSE41-NEXT:    retq
   1085 ;
   1086 ; AVX1-LABEL: load_zext_8i8_to_8i32:
   1087 ; AVX1:       # %bb.0: # %entry
   1088 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
   1089 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
   1090 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1091 ; AVX1-NEXT:    retq
   1092 ;
   1093 ; AVX2-LABEL: load_zext_8i8_to_8i32:
   1094 ; AVX2:       # %bb.0: # %entry
   1095 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
   1096 ; AVX2-NEXT:    retq
   1097 ;
   1098 ; AVX512-LABEL: load_zext_8i8_to_8i32:
   1099 ; AVX512:       # %bb.0: # %entry
   1100 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
   1101 ; AVX512-NEXT:    retq
   1102 entry:
   1103  %X = load <8 x i8>, <8 x i8>* %ptr
   1104  %Y = zext <8 x i8> %X to <8 x i32>
   1105  ret <8 x i32> %Y
   1106 }
   1107 
   1108 define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) {
   1109 ; SSE2-LABEL: load_zext_16i8_to_8i32:
   1110 ; SSE2:       # %bb.0: # %entry
   1111 ; SSE2-NEXT:    movdqa (%rdi), %xmm1
   1112 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1113 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1114 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1115 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1116 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1117 ; SSE2-NEXT:    retq
   1118 ;
   1119 ; SSSE3-LABEL: load_zext_16i8_to_8i32:
   1120 ; SSSE3:       # %bb.0: # %entry
   1121 ; SSSE3-NEXT:    movdqa (%rdi), %xmm1
   1122 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1123 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1124 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1125 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1126 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1127 ; SSSE3-NEXT:    retq
   1128 ;
   1129 ; SSE41-LABEL: load_zext_16i8_to_8i32:
   1130 ; SSE41:       # %bb.0: # %entry
   1131 ; SSE41-NEXT:    movdqa (%rdi), %xmm1
   1132 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
   1133 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
   1134 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
   1135 ; SSE41-NEXT:    retq
   1136 ;
   1137 ; AVX1-LABEL: load_zext_16i8_to_8i32:
   1138 ; AVX1:       # %bb.0: # %entry
   1139 ; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
   1140 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1141 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1142 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1143 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1144 ; AVX1-NEXT:    retq
   1145 ;
   1146 ; AVX2-LABEL: load_zext_16i8_to_8i32:
   1147 ; AVX2:       # %bb.0: # %entry
   1148 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
   1149 ; AVX2-NEXT:    retq
   1150 ;
   1151 ; AVX512-LABEL: load_zext_16i8_to_8i32:
   1152 ; AVX512:       # %bb.0: # %entry
   1153 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
   1154 ; AVX512-NEXT:    retq
   1155 entry:
   1156  %X = load <16 x i8>, <16 x i8>* %ptr
   1157  %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1158  %Z = zext <8 x i8> %Y to <8 x i32>
   1159  ret <8 x i32> %Z
   1160 }
   1161 
   1162 define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) {
   1163 ; SSE2-LABEL: load_zext_8i8_to_8i64:
   1164 ; SSE2:       # %bb.0: # %entry
   1165 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
   1166 ; SSE2-NEXT:    pxor %xmm4, %xmm4
   1167 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
   1168 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
   1169 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
   1170 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1171 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
   1172 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
   1173 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
   1174 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
   1175 ; SSE2-NEXT:    movdqa %xmm3, %xmm2
   1176 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
   1177 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
   1178 ; SSE2-NEXT:    retq
   1179 ;
   1180 ; SSSE3-LABEL: load_zext_8i8_to_8i64:
   1181 ; SSSE3:       # %bb.0: # %entry
   1182 ; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
   1183 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128]
   1184 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1185 ; SSSE3-NEXT:    pshufb %xmm4, %xmm0
   1186 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128]
   1187 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
   1188 ; SSSE3-NEXT:    pshufb %xmm5, %xmm1
   1189 ; SSSE3-NEXT:    movdqa %xmm3, %xmm2
   1190 ; SSSE3-NEXT:    pshufb %xmm4, %xmm2
   1191 ; SSSE3-NEXT:    pshufb %xmm5, %xmm3
   1192 ; SSSE3-NEXT:    retq
   1193 ;
   1194 ; SSE41-LABEL: load_zext_8i8_to_8i64:
   1195 ; SSE41:       # %bb.0: # %entry
   1196 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   1197 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   1198 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   1199 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   1200 ; SSE41-NEXT:    retq
   1201 ;
   1202 ; AVX1-LABEL: load_zext_8i8_to_8i64:
   1203 ; AVX1:       # %bb.0: # %entry
   1204 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   1205 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   1206 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   1207 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   1208 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
   1209 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
   1210 ; AVX1-NEXT:    retq
   1211 ;
   1212 ; AVX2-LABEL: load_zext_8i8_to_8i64:
   1213 ; AVX2:       # %bb.0: # %entry
   1214 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
   1215 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
   1216 ; AVX2-NEXT:    retq
   1217 ;
   1218 ; AVX512-LABEL: load_zext_8i8_to_8i64:
   1219 ; AVX512:       # %bb.0: # %entry
   1220 ; AVX512-NEXT:    vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
   1221 ; AVX512-NEXT:    retq
   1222 entry:
   1223  %X = load <8 x i8>, <8 x i8>* %ptr
   1224  %Y = zext <8 x i8> %X to <8 x i64>
   1225  ret <8 x i64> %Y
   1226 }
   1227 
   1228 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
   1229 ; SSE2-LABEL: load_zext_16i8_to_16i16:
   1230 ; SSE2:       # %bb.0: # %entry
   1231 ; SSE2-NEXT:    movdqa (%rdi), %xmm1
   1232 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1233 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1234 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   1235 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
   1236 ; SSE2-NEXT:    retq
   1237 ;
   1238 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
   1239 ; SSSE3:       # %bb.0: # %entry
   1240 ; SSSE3-NEXT:    movdqa (%rdi), %xmm1
   1241 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1242 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1243 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   1244 ; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
   1245 ; SSSE3-NEXT:    retq
   1246 ;
   1247 ; SSE41-LABEL: load_zext_16i8_to_16i16:
   1248 ; SSE41:       # %bb.0: # %entry
   1249 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
   1250 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
   1251 ; SSE41-NEXT:    retq
   1252 ;
   1253 ; AVX1-LABEL: load_zext_16i8_to_16i16:
   1254 ; AVX1:       # %bb.0: # %entry
   1255 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
   1256 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
   1257 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1258 ; AVX1-NEXT:    retq
   1259 ;
   1260 ; AVX2-LABEL: load_zext_16i8_to_16i16:
   1261 ; AVX2:       # %bb.0: # %entry
   1262 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
   1263 ; AVX2-NEXT:    retq
   1264 ;
   1265 ; AVX512-LABEL: load_zext_16i8_to_16i16:
   1266 ; AVX512:       # %bb.0: # %entry
   1267 ; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
   1268 ; AVX512-NEXT:    retq
   1269 entry:
   1270  %X = load <16 x i8>, <16 x i8>* %ptr
   1271  %Y = zext <16 x i8> %X to <16 x i16>
   1272  ret <16 x i16> %Y
   1273 }
   1274 
   1275 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
   1276 ; SSE2-LABEL: load_zext_2i16_to_2i64:
   1277 ; SSE2:       # %bb.0: # %entry
   1278 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1279 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1280 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1281 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1282 ; SSE2-NEXT:    retq
   1283 ;
   1284 ; SSSE3-LABEL: load_zext_2i16_to_2i64:
   1285 ; SSSE3:       # %bb.0: # %entry
   1286 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   1287 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1288 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1289 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1290 ; SSSE3-NEXT:    retq
   1291 ;
   1292 ; SSE41-LABEL: load_zext_2i16_to_2i64:
   1293 ; SSE41:       # %bb.0: # %entry
   1294 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
   1295 ; SSE41-NEXT:    retq
   1296 ;
   1297 ; AVX-LABEL: load_zext_2i16_to_2i64:
   1298 ; AVX:       # %bb.0: # %entry
   1299 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
   1300 ; AVX-NEXT:    retq
   1301 entry:
   1302  %X = load <2 x i16>, <2 x i16>* %ptr
   1303  %Y = zext <2 x i16> %X to <2 x i64>
   1304  ret <2 x i64> %Y
   1305 }
   1306 
   1307 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
   1308 ; SSE2-LABEL: load_zext_4i16_to_4i32:
   1309 ; SSE2:       # %bb.0: # %entry
   1310 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
   1311 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1312 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1313 ; SSE2-NEXT:    retq
   1314 ;
   1315 ; SSSE3-LABEL: load_zext_4i16_to_4i32:
   1316 ; SSSE3:       # %bb.0: # %entry
   1317 ; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
   1318 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1319 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1320 ; SSSE3-NEXT:    retq
   1321 ;
   1322 ; SSE41-LABEL: load_zext_4i16_to_4i32:
   1323 ; SSE41:       # %bb.0: # %entry
   1324 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1325 ; SSE41-NEXT:    retq
   1326 ;
   1327 ; AVX-LABEL: load_zext_4i16_to_4i32:
   1328 ; AVX:       # %bb.0: # %entry
   1329 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1330 ; AVX-NEXT:    retq
   1331 entry:
   1332  %X = load <4 x i16>, <4 x i16>* %ptr
   1333  %Y = zext <4 x i16> %X to <4 x i32>
   1334  ret <4 x i32> %Y
   1335 }
   1336 
   1337 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
   1338 ; SSE2-LABEL: load_zext_4i16_to_4i64:
   1339 ; SSE2:       # %bb.0: # %entry
   1340 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
   1341 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1342 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1343 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1344 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1345 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1346 ; SSE2-NEXT:    retq
   1347 ;
   1348 ; SSSE3-LABEL: load_zext_4i16_to_4i64:
   1349 ; SSSE3:       # %bb.0: # %entry
   1350 ; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
   1351 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1352 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1353 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1354 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1355 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1356 ; SSSE3-NEXT:    retq
   1357 ;
   1358 ; SSE41-LABEL: load_zext_4i16_to_4i64:
   1359 ; SSE41:       # %bb.0: # %entry
   1360 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
   1361 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
   1362 ; SSE41-NEXT:    retq
   1363 ;
   1364 ; AVX1-LABEL: load_zext_4i16_to_4i64:
   1365 ; AVX1:       # %bb.0: # %entry
   1366 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
   1367 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
   1368 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1369 ; AVX1-NEXT:    retq
   1370 ;
   1371 ; AVX2-LABEL: load_zext_4i16_to_4i64:
   1372 ; AVX2:       # %bb.0: # %entry
   1373 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
   1374 ; AVX2-NEXT:    retq
   1375 ;
   1376 ; AVX512-LABEL: load_zext_4i16_to_4i64:
   1377 ; AVX512:       # %bb.0: # %entry
   1378 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
   1379 ; AVX512-NEXT:    retq
   1380 entry:
   1381  %X = load <4 x i16>, <4 x i16>* %ptr
   1382  %Y = zext <4 x i16> %X to <4 x i64>
   1383  ret <4 x i64> %Y
   1384 }
   1385 
   1386 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
   1387 ; SSE2-LABEL: load_zext_8i16_to_8i32:
   1388 ; SSE2:       # %bb.0: # %entry
   1389 ; SSE2-NEXT:    movdqa (%rdi), %xmm1
   1390 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1391 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1392 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1393 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1394 ; SSE2-NEXT:    retq
   1395 ;
   1396 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
   1397 ; SSSE3:       # %bb.0: # %entry
   1398 ; SSSE3-NEXT:    movdqa (%rdi), %xmm1
   1399 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1400 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1401 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1402 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1403 ; SSSE3-NEXT:    retq
   1404 ;
   1405 ; SSE41-LABEL: load_zext_8i16_to_8i32:
   1406 ; SSE41:       # %bb.0: # %entry
   1407 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1408 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1409 ; SSE41-NEXT:    retq
   1410 ;
   1411 ; AVX1-LABEL: load_zext_8i16_to_8i32:
   1412 ; AVX1:       # %bb.0: # %entry
   1413 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1414 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1415 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1416 ; AVX1-NEXT:    retq
   1417 ;
   1418 ; AVX2-LABEL: load_zext_8i16_to_8i32:
   1419 ; AVX2:       # %bb.0: # %entry
   1420 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
   1421 ; AVX2-NEXT:    retq
   1422 ;
   1423 ; AVX512-LABEL: load_zext_8i16_to_8i32:
   1424 ; AVX512:       # %bb.0: # %entry
   1425 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
   1426 ; AVX512-NEXT:    retq
   1427 entry:
   1428  %X = load <8 x i16>, <8 x i16>* %ptr
   1429  %Y = zext <8 x i16> %X to <8 x i32>
   1430  ret <8 x i32> %Y
   1431 }
   1432 
   1433 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
   1434 ; SSE2-LABEL: load_zext_2i32_to_2i64:
   1435 ; SSE2:       # %bb.0: # %entry
   1436 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1437 ; SSE2-NEXT:    xorps %xmm1, %xmm1
   1438 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1439 ; SSE2-NEXT:    retq
   1440 ;
   1441 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
   1442 ; SSSE3:       # %bb.0: # %entry
   1443 ; SSSE3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
   1444 ; SSSE3-NEXT:    xorps %xmm1, %xmm1
   1445 ; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1446 ; SSSE3-NEXT:    retq
   1447 ;
   1448 ; SSE41-LABEL: load_zext_2i32_to_2i64:
   1449 ; SSE41:       # %bb.0: # %entry
   1450 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
   1451 ; SSE41-NEXT:    retq
   1452 ;
   1453 ; AVX-LABEL: load_zext_2i32_to_2i64:
   1454 ; AVX:       # %bb.0: # %entry
   1455 ; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
   1456 ; AVX-NEXT:    retq
   1457 entry:
   1458  %X = load <2 x i32>, <2 x i32>* %ptr
   1459  %Y = zext <2 x i32> %X to <2 x i64>
   1460  ret <2 x i64> %Y
   1461 }
   1462 
   1463 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
   1464 ; SSE2-LABEL: load_zext_4i32_to_4i64:
   1465 ; SSE2:       # %bb.0: # %entry
   1466 ; SSE2-NEXT:    movaps (%rdi), %xmm1
   1467 ; SSE2-NEXT:    xorps %xmm2, %xmm2
   1468 ; SSE2-NEXT:    movaps %xmm1, %xmm0
   1469 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1470 ; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1471 ; SSE2-NEXT:    retq
   1472 ;
   1473 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
   1474 ; SSSE3:       # %bb.0: # %entry
   1475 ; SSSE3-NEXT:    movaps (%rdi), %xmm1
   1476 ; SSSE3-NEXT:    xorps %xmm2, %xmm2
   1477 ; SSSE3-NEXT:    movaps %xmm1, %xmm0
   1478 ; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1479 ; SSSE3-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1480 ; SSSE3-NEXT:    retq
   1481 ;
   1482 ; SSE41-LABEL: load_zext_4i32_to_4i64:
   1483 ; SSE41:       # %bb.0: # %entry
   1484 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
   1485 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
   1486 ; SSE41-NEXT:    retq
   1487 ;
   1488 ; AVX1-LABEL: load_zext_4i32_to_4i64:
   1489 ; AVX1:       # %bb.0: # %entry
   1490 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
   1491 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
   1492 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1493 ; AVX1-NEXT:    retq
   1494 ;
   1495 ; AVX2-LABEL: load_zext_4i32_to_4i64:
   1496 ; AVX2:       # %bb.0: # %entry
   1497 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1498 ; AVX2-NEXT:    retq
   1499 ;
   1500 ; AVX512-LABEL: load_zext_4i32_to_4i64:
   1501 ; AVX512:       # %bb.0: # %entry
   1502 ; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1503 ; AVX512-NEXT:    retq
   1504 entry:
   1505  %X = load <4 x i32>, <4 x i32>* %ptr
   1506  %Y = zext <4 x i32> %X to <4 x i64>
   1507  ret <4 x i64> %Y
   1508 }
   1509 
   1510 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
   1511 ; SSE2-LABEL: zext_8i8_to_8i32:
   1512 ; SSE2:       # %bb.0: # %entry
   1513 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1514 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
   1515 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1516 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1517 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1518 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1519 ; SSE2-NEXT:    retq
   1520 ;
   1521 ; SSSE3-LABEL: zext_8i8_to_8i32:
   1522 ; SSSE3:       # %bb.0: # %entry
   1523 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1524 ; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm1
   1525 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1526 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1527 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1528 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1529 ; SSSE3-NEXT:    retq
   1530 ;
   1531 ; SSE41-LABEL: zext_8i8_to_8i32:
   1532 ; SSE41:       # %bb.0: # %entry
   1533 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm0
   1534 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1535 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1536 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1537 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
   1538 ; SSE41-NEXT:    retq
   1539 ;
   1540 ; AVX1-LABEL: zext_8i8_to_8i32:
   1541 ; AVX1:       # %bb.0: # %entry
   1542 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
   1543 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1544 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1545 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1546 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1547 ; AVX1-NEXT:    retq
   1548 ;
   1549 ; AVX2-LABEL: zext_8i8_to_8i32:
   1550 ; AVX2:       # %bb.0: # %entry
   1551 ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
   1552 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1553 ; AVX2-NEXT:    retq
   1554 ;
   1555 ; AVX512-LABEL: zext_8i8_to_8i32:
   1556 ; AVX512:       # %bb.0: # %entry
   1557 ; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
   1558 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1559 ; AVX512-NEXT:    retq
   1560 entry:
   1561   %t = zext <8 x i8> %z to <8 x i32>
   1562   ret <8 x i32> %t
   1563 }
   1564 
   1565 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
   1566 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
   1567 ; SSE2:       # %bb.0: # %entry
   1568 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1569 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1570 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1571 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1572 ; SSE2-NEXT:    retq
   1573 ;
   1574 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
   1575 ; SSSE3:       # %bb.0: # %entry
   1576 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1577 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1578 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1579 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1580 ; SSSE3-NEXT:    retq
   1581 ;
   1582 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
   1583 ; SSE41:       # %bb.0: # %entry
   1584 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1585 ; SSE41-NEXT:    pxor %xmm2, %xmm2
   1586 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1587 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1588 ; SSE41-NEXT:    retq
   1589 ;
   1590 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
   1591 ; AVX1:       # %bb.0: # %entry
   1592 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1593 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1594 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1595 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1596 ; AVX1-NEXT:    retq
   1597 ;
   1598 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
   1599 ; AVX2:       # %bb.0: # %entry
   1600 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1601 ; AVX2-NEXT:    retq
   1602 ;
   1603 ; AVX512-LABEL: shuf_zext_8i16_to_8i32:
   1604 ; AVX512:       # %bb.0: # %entry
   1605 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1606 ; AVX512-NEXT:    retq
   1607 entry:
   1608   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
   1609   %Z = bitcast <16 x i16> %B to <8 x i32>
   1610   ret <8 x i32> %Z
   1611 }
   1612 
   1613 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
   1614 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
   1615 ; SSE2:       # %bb.0: # %entry
   1616 ; SSE2-NEXT:    movaps %xmm0, %xmm1
   1617 ; SSE2-NEXT:    xorps %xmm2, %xmm2
   1618 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1619 ; SSE2-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1620 ; SSE2-NEXT:    retq
   1621 ;
   1622 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
   1623 ; SSSE3:       # %bb.0: # %entry
   1624 ; SSSE3-NEXT:    movaps %xmm0, %xmm1
   1625 ; SSSE3-NEXT:    xorps %xmm2, %xmm2
   1626 ; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1627 ; SSSE3-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1628 ; SSSE3-NEXT:    retq
   1629 ;
   1630 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
   1631 ; SSE41:       # %bb.0: # %entry
   1632 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1633 ; SSE41-NEXT:    pxor %xmm2, %xmm2
   1634 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
   1635 ; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1636 ; SSE41-NEXT:    retq
   1637 ;
   1638 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
   1639 ; AVX1:       # %bb.0: # %entry
   1640 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1641 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1642 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
   1643 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1644 ; AVX1-NEXT:    retq
   1645 ;
   1646 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
   1647 ; AVX2:       # %bb.0: # %entry
   1648 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1649 ; AVX2-NEXT:    retq
   1650 ;
   1651 ; AVX512-LABEL: shuf_zext_4i32_to_4i64:
   1652 ; AVX512:       # %bb.0: # %entry
   1653 ; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1654 ; AVX512-NEXT:    retq
   1655 entry:
   1656   %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
   1657   %Z = bitcast <8 x i32> %B to <4 x i64>
   1658   ret <4 x i64> %Z
   1659 }
   1660 
   1661 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
   1662 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
   1663 ; SSE2:       # %bb.0: # %entry
   1664 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1665 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
   1666 ; SSE2-NEXT:    packuswb %xmm1, %xmm1
   1667 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1668 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1669 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1670 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1671 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1672 ; SSE2-NEXT:    retq
   1673 ;
   1674 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
   1675 ; SSSE3:       # %bb.0: # %entry
   1676 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1677 ; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm1
   1678 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1679 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1680 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1681 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1682 ; SSSE3-NEXT:    retq
   1683 ;
   1684 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
   1685 ; SSE41:       # %bb.0: # %entry
   1686 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
   1687 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1688 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1689 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1690 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
   1691 ; SSE41-NEXT:    retq
   1692 ;
   1693 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
   1694 ; AVX1:       # %bb.0: # %entry
   1695 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
   1696 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1697 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1698 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1699 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1700 ; AVX1-NEXT:    retq
   1701 ;
   1702 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
   1703 ; AVX2:       # %bb.0: # %entry
   1704 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
   1705 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1706 ; AVX2-NEXT:    retq
   1707 ;
   1708 ; AVX512-LABEL: shuf_zext_8i8_to_8i32:
   1709 ; AVX512:       # %bb.0: # %entry
   1710 ; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
   1711 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1712 ; AVX512-NEXT:    retq
   1713 entry:
   1714   %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
   1715   %Z = bitcast <32 x i8> %B to <8 x i32>
   1716   ret <8 x i32> %Z
   1717 }
   1718 
   1719 define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
   1720 ; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
   1721 ; SSE2:       # %bb.0: # %entry
   1722 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1723 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1724 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1725 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1726 ; SSE2-NEXT:    retq
   1727 ;
   1728 ; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
   1729 ; SSSE3:       # %bb.0: # %entry
   1730 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
   1731 ; SSSE3-NEXT:    retq
   1732 ;
   1733 ; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
   1734 ; SSE41:       # %bb.0: # %entry
   1735 ; SSE41-NEXT:    psrlq $48, %xmm0
   1736 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1737 ; SSE41-NEXT:    retq
   1738 ;
   1739 ; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6:
   1740 ; AVX:       # %bb.0: # %entry
   1741 ; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm0
   1742 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1743 ; AVX-NEXT:    retq
   1744 entry:
   1745   %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
   1746   %Z = bitcast <16 x i8> %B to <2 x i64>
   1747   ret <2 x i64> %Z
   1748 }
   1749 
   1750 define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
   1751 ; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1752 ; SSE2:       # %bb.0: # %entry
   1753 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1754 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
   1755 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1756 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
   1757 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1758 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1759 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1760 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1761 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   1762 ; SSE2-NEXT:    retq
   1763 ;
   1764 ; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1765 ; SSSE3:       # %bb.0: # %entry
   1766 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1767 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
   1768 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
   1769 ; SSSE3-NEXT:    retq
   1770 ;
   1771 ; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1772 ; SSE41:       # %bb.0: # %entry
   1773 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1774 ; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1775 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
   1776 ; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1777 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1778 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
   1779 ; SSE41-NEXT:    retq
   1780 ;
   1781 ; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1782 ; AVX1:       # %bb.0: # %entry
   1783 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1784 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
   1785 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1786 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1787 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1788 ; AVX1-NEXT:    retq
   1789 ;
   1790 ; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1791 ; AVX2:       # %bb.0: # %entry
   1792 ; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1793 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   1794 ; AVX2-NEXT:    retq
   1795 ;
   1796 ; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1797 ; AVX512:       # %bb.0: # %entry
   1798 ; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1799 ; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   1800 ; AVX512-NEXT:    retq
   1801 entry:
   1802   %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
   1803   %Z = bitcast <32 x i8> %B to <4 x i64>
   1804   ret <4 x i64> %Z
   1805 }
   1806 
   1807 define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
   1808 ; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
   1809 ; SSE2:       # %bb.0: # %entry
   1810 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1811 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1812 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1813 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1814 ; SSE2-NEXT:    retq
   1815 ;
   1816 ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
   1817 ; SSSE3:       # %bb.0: # %entry
   1818 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
   1819 ; SSSE3-NEXT:    retq
   1820 ;
   1821 ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
   1822 ; SSE41:       # %bb.0: # %entry
   1823 ; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1824 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1825 ; SSE41-NEXT:    retq
   1826 ;
   1827 ; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6:
   1828 ; AVX:       # %bb.0: # %entry
   1829 ; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1830 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1831 ; AVX-NEXT:    retq
   1832 entry:
   1833   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
   1834   %Z = bitcast <8 x i16> %B to <2 x i64>
   1835   ret <2 x i64> %Z
   1836 }
   1837 
   1838 define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
   1839 ; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1840 ; SSE2:       # %bb.0: # %entry
   1841 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1842 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1843 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1844 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1845 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1846 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   1847 ; SSE2-NEXT:    retq
   1848 ;
   1849 ; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1850 ; SSSE3:       # %bb.0: # %entry
   1851 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1852 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1853 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1854 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1855 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1856 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   1857 ; SSSE3-NEXT:    retq
   1858 ;
   1859 ; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1860 ; SSE41:       # %bb.0: # %entry
   1861 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1862 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
   1863 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1864 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1865 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
   1866 ; SSE41-NEXT:    retq
   1867 ;
   1868 ; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1869 ; AVX1:       # %bb.0: # %entry
   1870 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1871 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
   1872 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1873 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1874 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1875 ; AVX1-NEXT:    retq
   1876 ;
   1877 ; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1878 ; AVX2:       # %bb.0: # %entry
   1879 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
   1880 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1881 ; AVX2-NEXT:    retq
   1882 ;
   1883 ; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1884 ; AVX512:       # %bb.0: # %entry
   1885 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
   1886 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1887 ; AVX512-NEXT:    retq
   1888 entry:
   1889   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
   1890   %Z = bitcast <16 x i16> %B to <4 x i64>
   1891   ret <4 x i64> %Z
   1892 }
   1893 
   1894 define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
   1895 ; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1:
   1896 ; SSE2:       # %bb.0: # %entry
   1897 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1898 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1899 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1900 ; SSE2-NEXT:    retq
   1901 ;
   1902 ; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1:
   1903 ; SSSE3:       # %bb.0: # %entry
   1904 ; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1905 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1906 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1907 ; SSSE3-NEXT:    retq
   1908 ;
   1909 ; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1:
   1910 ; SSE41:       # %bb.0: # %entry
   1911 ; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1912 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1913 ; SSE41-NEXT:    retq
   1914 ;
   1915 ; AVX1-LABEL: shuf_zext_8i16_to_4i32_offset1:
   1916 ; AVX1:       # %bb.0: # %entry
   1917 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1918 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1919 ; AVX1-NEXT:    retq
   1920 ;
   1921 ; AVX2-SLOW-LABEL: shuf_zext_8i16_to_4i32_offset1:
   1922 ; AVX2-SLOW:       # %bb.0: # %entry
   1923 ; AVX2-SLOW-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1924 ; AVX2-SLOW-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1925 ; AVX2-SLOW-NEXT:    retq
   1926 ;
   1927 ; AVX2-FAST-LABEL: shuf_zext_8i16_to_4i32_offset1:
   1928 ; AVX2-FAST:       # %bb.0: # %entry
   1929 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
   1930 ; AVX2-FAST-NEXT:    retq
   1931 ;
   1932 ; AVX512F-LABEL: shuf_zext_8i16_to_4i32_offset1:
   1933 ; AVX512F:       # %bb.0: # %entry
   1934 ; AVX512F-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1935 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1936 ; AVX512F-NEXT:    retq
   1937 ;
   1938 ; AVX512BW-LABEL: shuf_zext_8i16_to_4i32_offset1:
   1939 ; AVX512BW:       # %bb.0: # %entry
   1940 ; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero
   1941 ; AVX512BW-NEXT:    retq
   1942 entry:
   1943   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
   1944   %Z = bitcast <8 x i16> %B to <4 x i32>
   1945   ret <4 x i32> %Z
   1946 }
   1947 
   1948 define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
   1949 ; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1950 ; SSE2:       # %bb.0: # %entry
   1951 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1952 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1953 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1954 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1955 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1956 ; SSE2-NEXT:    retq
   1957 ;
   1958 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1959 ; SSSE3:       # %bb.0: # %entry
   1960 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1961 ; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1962 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1963 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1964 ; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1965 ; SSSE3-NEXT:    retq
   1966 ;
   1967 ; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1968 ; SSE41:       # %bb.0: # %entry
   1969 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1970 ; SSE41-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1971 ; SSE41-NEXT:    pxor %xmm2, %xmm2
   1972 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   1973 ; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1974 ; SSE41-NEXT:    retq
   1975 ;
   1976 ; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1977 ; AVX1:       # %bb.0: # %entry
   1978 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1979 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   1980 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1981 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1982 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1983 ; AVX1-NEXT:    retq
   1984 ;
   1985 ; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1986 ; AVX2:       # %bb.0: # %entry
   1987 ; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1988 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1989 ; AVX2-NEXT:    retq
   1990 ;
   1991 ; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1992 ; AVX512:       # %bb.0: # %entry
   1993 ; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1994 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1995 ; AVX512-NEXT:    retq
   1996 entry:
   1997   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
   1998   %Z = bitcast <16 x i16> %B to <8 x i32>
   1999   ret <8 x i32> %Z
   2000 }
   2001 
   2002 define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
   2003 ; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
   2004 ; SSE2:       # %bb.0: # %entry
   2005 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   2006 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   2007 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2008 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   2009 ; SSE2-NEXT:    retq
   2010 ;
   2011 ; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
   2012 ; SSSE3:       # %bb.0: # %entry
   2013 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   2014 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   2015 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   2016 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   2017 ; SSSE3-NEXT:    retq
   2018 ;
   2019 ; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
   2020 ; SSE41:       # %bb.0: # %entry
   2021 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
   2022 ; SSE41-NEXT:    pxor %xmm2, %xmm2
   2023 ; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
   2024 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
   2025 ; SSE41-NEXT:    movdqa %xmm2, %xmm1
   2026 ; SSE41-NEXT:    retq
   2027 ;
   2028 ; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
   2029 ; AVX1:       # %bb.0: # %entry
   2030 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2031 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
   2032 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   2033 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
   2034 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   2035 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2036 ; AVX1-NEXT:    retq
   2037 ;
   2038 ; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
   2039 ; AVX2:       # %bb.0: # %entry
   2040 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   2041 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   2042 ; AVX2-NEXT:    retq
   2043 ;
   2044 ; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8:
   2045 ; AVX512:       # %bb.0: # %entry
   2046 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
   2047 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   2048 ; AVX512-NEXT:    retq
   2049 entry:
   2050   %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
   2051   %Z = bitcast <16 x i16> %B to <8 x i32>
   2052   ret <8 x i32> %Z
   2053 }
   2054 
   2055 define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
   2056 ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
   2057 ; SSE:       # %bb.0: # %entry
   2058 ; SSE-NEXT:    xorps %xmm1, %xmm1
   2059 ; SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2060 ; SSE-NEXT:    retq
   2061 ;
   2062 ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
   2063 ; AVX:       # %bb.0: # %entry
   2064 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   2065 ; AVX-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2066 ; AVX-NEXT:    retq
   2067 entry:
   2068   %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
   2069   %Z = bitcast <4 x i32> %B to <2 x i64>
   2070   ret <2 x i64> %Z
   2071 }
   2072 
   2073 define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
   2074 ; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
   2075 ; SSE2:       # %bb.0: # %entry
   2076 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   2077 ; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
   2078 ; SSE2-NEXT:    pand %xmm1, %xmm0
   2079 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   2080 ; SSE2-NEXT:    retq
   2081 ;
   2082 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
   2083 ; SSSE3:       # %bb.0: # %entry
   2084 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   2085 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
   2086 ; SSSE3-NEXT:    pand %xmm1, %xmm0
   2087 ; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   2088 ; SSSE3-NEXT:    retq
   2089 ;
   2090 ; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
   2091 ; SSE41:       # %bb.0: # %entry
   2092 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   2093 ; SSE41-NEXT:    pxor %xmm0, %xmm0
   2094 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
   2095 ; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   2096 ; SSE41-NEXT:    retq
   2097 ;
   2098 ; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
   2099 ; AVX1:       # %bb.0: # %entry
   2100 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   2101 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
   2102 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   2103 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   2104 ; AVX1-NEXT:    retq
   2105 ;
   2106 ; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
   2107 ; AVX2:       # %bb.0: # %entry
   2108 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
   2109 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   2110 ; AVX2-NEXT:    retq
   2111 ;
   2112 ; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1:
   2113 ; AVX512:       # %bb.0: # %entry
   2114 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
   2115 ; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   2116 ; AVX512-NEXT:    retq
   2117 entry:
   2118   %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
   2119   %Z = bitcast <8 x i32> %B to <4 x i64>
   2120   ret <4 x i64> %Z
   2121 }
   2122 
   2123 define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) {
   2124 ; SSE2-LABEL: zext_32i8_to_32i32:
   2125 ; SSE2:       # %bb.0:
   2126 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   2127 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
   2128 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   2129 ; SSE2-NEXT:    movdqa %xmm3, %xmm8
   2130 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
   2131 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   2132 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
   2133 ; SSE2-NEXT:    movdqa %xmm0, %xmm5
   2134 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
   2135 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   2136 ; SSE2-NEXT:    movdqa %xmm1, %xmm6
   2137 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
   2138 ; SSE2-NEXT:    movdqa %xmm6, %xmm7
   2139 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
   2140 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
   2141 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
   2142 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
   2143 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
   2144 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   2145 ; SSE2-NEXT:    movdqa %xmm1, 112(%rdi)
   2146 ; SSE2-NEXT:    movdqa %xmm4, 96(%rdi)
   2147 ; SSE2-NEXT:    movdqa %xmm6, 80(%rdi)
   2148 ; SSE2-NEXT:    movdqa %xmm7, 64(%rdi)
   2149 ; SSE2-NEXT:    movdqa %xmm0, 48(%rdi)
   2150 ; SSE2-NEXT:    movdqa %xmm5, 32(%rdi)
   2151 ; SSE2-NEXT:    movdqa %xmm3, 16(%rdi)
   2152 ; SSE2-NEXT:    movdqa %xmm8, (%rdi)
   2153 ; SSE2-NEXT:    movq %rdi, %rax
   2154 ; SSE2-NEXT:    retq
   2155 ;
   2156 ; SSSE3-LABEL: zext_32i8_to_32i32:
   2157 ; SSSE3:       # %bb.0:
   2158 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   2159 ; SSSE3-NEXT:    movdqa %xmm0, %xmm3
   2160 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   2161 ; SSSE3-NEXT:    movdqa %xmm3, %xmm8
   2162 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3]
   2163 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
   2164 ; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
   2165 ; SSSE3-NEXT:    movdqa %xmm0, %xmm5
   2166 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
   2167 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   2168 ; SSSE3-NEXT:    movdqa %xmm1, %xmm6
   2169 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
   2170 ; SSSE3-NEXT:    movdqa %xmm6, %xmm7
   2171 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3]
   2172 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
   2173 ; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
   2174 ; SSSE3-NEXT:    movdqa %xmm1, %xmm4
   2175 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
   2176 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   2177 ; SSSE3-NEXT:    movdqa %xmm1, 112(%rdi)
   2178 ; SSSE3-NEXT:    movdqa %xmm4, 96(%rdi)
   2179 ; SSSE3-NEXT:    movdqa %xmm6, 80(%rdi)
   2180 ; SSSE3-NEXT:    movdqa %xmm7, 64(%rdi)
   2181 ; SSSE3-NEXT:    movdqa %xmm0, 48(%rdi)
   2182 ; SSSE3-NEXT:    movdqa %xmm5, 32(%rdi)
   2183 ; SSSE3-NEXT:    movdqa %xmm3, 16(%rdi)
   2184 ; SSSE3-NEXT:    movdqa %xmm8, (%rdi)
   2185 ; SSSE3-NEXT:    movq %rdi, %rax
   2186 ; SSSE3-NEXT:    retq
   2187 ;
   2188 ; SSE41-LABEL: zext_32i8_to_32i32:
   2189 ; SSE41:       # %bb.0:
   2190 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   2191 ; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
   2192 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
   2193 ; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
   2194 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
   2195 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
   2196 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   2197 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm5 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
   2198 ; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[1,1,2,3]
   2199 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
   2200 ; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm1[2,3,0,1]
   2201 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
   2202 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
   2203 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
   2204 ; SSE41-NEXT:    movdqa %xmm1, 112(%rdi)
   2205 ; SSE41-NEXT:    movdqa %xmm7, 96(%rdi)
   2206 ; SSE41-NEXT:    movdqa %xmm6, 80(%rdi)
   2207 ; SSE41-NEXT:    movdqa %xmm5, 64(%rdi)
   2208 ; SSE41-NEXT:    movdqa %xmm0, 48(%rdi)
   2209 ; SSE41-NEXT:    movdqa %xmm4, 32(%rdi)
   2210 ; SSE41-NEXT:    movdqa %xmm3, 16(%rdi)
   2211 ; SSE41-NEXT:    movdqa %xmm2, (%rdi)
   2212 ; SSE41-NEXT:    movq %rdi, %rax
   2213 ; SSE41-NEXT:    retq
   2214 ;
   2215 ; AVX1-LABEL: zext_32i8_to_32i32:
   2216 ; AVX1:       # %bb.0:
   2217 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   2218 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
   2219 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
   2220 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm4
   2221 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   2222 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
   2223 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm3[1,1,2,3]
   2224 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
   2225 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
   2226 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   2227 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
   2228 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
   2229 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   2230 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
   2231 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
   2232 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   2233 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[3,1,2,3]
   2234 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
   2235 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm3
   2236 ; AVX1-NEXT:    vmovaps %ymm4, %ymm0
   2237 ; AVX1-NEXT:    retq
   2238 ;
   2239 ; AVX2-LABEL: zext_32i8_to_32i32:
   2240 ; AVX2:       # %bb.0:
   2241 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   2242 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
   2243 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
   2244 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[3,1,2,3]
   2245 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
   2246 ; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
   2247 ; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm3
   2248 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   2249 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
   2250 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   2251 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   2252 ; AVX2-NEXT:    vmovdqa %ymm4, %ymm0
   2253 ; AVX2-NEXT:    retq
   2254 ;
   2255 ; AVX512-LABEL: zext_32i8_to_32i32:
   2256 ; AVX512:       # %bb.0:
   2257 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
   2258 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
   2259 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
   2260 ; AVX512-NEXT:    vmovdqa64 %zmm2, %zmm0
   2261 ; AVX512-NEXT:    retq
   2262   %res = zext <32 x i8>%x to <32 x i32>
   2263   ret <32 x i32> %res
   2264 }
   2265 
   2266 define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) {
   2267 ; SSE2-LABEL: zext_2i8_to_2i32:
   2268 ; SSE2:       # %bb.0:
   2269 ; SSE2-NEXT:    movzwl (%rdi), %eax
   2270 ; SSE2-NEXT:    movd %eax, %xmm0
   2271 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   2272 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   2273 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2274 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
   2275 ; SSE2-NEXT:    paddq %xmm0, %xmm0
   2276 ; SSE2-NEXT:    retq
   2277 ;
   2278 ; SSSE3-LABEL: zext_2i8_to_2i32:
   2279 ; SSSE3:       # %bb.0:
   2280 ; SSSE3-NEXT:    movzwl (%rdi), %eax
   2281 ; SSSE3-NEXT:    movd %eax, %xmm0
   2282 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[3],zero,zero,zero
   2283 ; SSSE3-NEXT:    paddq %xmm0, %xmm0
   2284 ; SSSE3-NEXT:    retq
   2285 ;
   2286 ; SSE41-LABEL: zext_2i8_to_2i32:
   2287 ; SSE41:       # %bb.0:
   2288 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   2289 ; SSE41-NEXT:    paddq %xmm0, %xmm0
   2290 ; SSE41-NEXT:    retq
   2291 ;
   2292 ; AVX-LABEL: zext_2i8_to_2i32:
   2293 ; AVX:       # %bb.0:
   2294 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
   2295 ; AVX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
   2296 ; AVX-NEXT:    retq
   2297   %x = load <2 x i8>, <2 x i8>* %addr, align 1
   2298   %y = zext <2 x i8> %x to <2 x i32>
   2299   %z = add <2 x i32>%y, %y
   2300   ret <2 x i32>%z
   2301 }
   2302