Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
      7 
      8 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
      9 ; SSE2-LABEL: zext_16i8_to_8i16:
     10 ; SSE2:       # BB#0: # %entry
     11 ; SSE2-NEXT:    pxor %xmm1, %xmm1
     12 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     13 ; SSE2-NEXT:    retq
     14 ;
     15 ; SSSE3-LABEL: zext_16i8_to_8i16:
     16 ; SSSE3:       # BB#0: # %entry
     17 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
     18 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     19 ; SSSE3-NEXT:    retq
     20 ;
     21 ; SSE41-LABEL: zext_16i8_to_8i16:
     22 ; SSE41:       # BB#0: # %entry
     23 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     24 ; SSE41-NEXT:    retq
     25 ;
     26 ; AVX-LABEL: zext_16i8_to_8i16:
     27 ; AVX:       # BB#0: # %entry
     28 ; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     29 ; AVX-NEXT:    retq
     30 entry:
     31   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     32   %C = zext <8 x i8> %B to <8 x i16>
     33   ret <8 x i16> %C
     34 }
     35 
     36 ; PR17654
     37 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
     38 ; SSE2-LABEL: zext_16i8_to_16i16:
     39 ; SSE2:       # BB#0: # %entry
     40 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
     41 ; SSE2-NEXT:    pxor %xmm2, %xmm2
     42 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
     43 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
     44 ; SSE2-NEXT:    retq
     45 ;
     46 ; SSSE3-LABEL: zext_16i8_to_16i16:
     47 ; SSSE3:       # BB#0: # %entry
     48 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
     49 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
     50 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
     51 ; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
     52 ; SSSE3-NEXT:    retq
     53 ;
     54 ; SSE41-LABEL: zext_16i8_to_16i16:
     55 ; SSE41:       # BB#0: # %entry
     56 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
     57 ; SSE41-NEXT:    pxor %xmm2, %xmm2
     58 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
     59 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
     60 ; SSE41-NEXT:    retq
     61 ;
     62 ; AVX1-LABEL: zext_16i8_to_16i16:
     63 ; AVX1:       # BB#0: # %entry
     64 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
     65 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
     66 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     67 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     68 ; AVX1-NEXT:    retq
     69 ;
     70 ; AVX2-LABEL: zext_16i8_to_16i16:
     71 ; AVX2:       # BB#0: # %entry
     72 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
     73 ; AVX2-NEXT:    retq
     74 entry:
     75   %B = zext <16 x i8> %A to <16 x i16>
     76   ret <16 x i16> %B
     77 }
     78 
     79 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
     80 ; SSE2-LABEL: zext_16i8_to_4i32:
     81 ; SSE2:       # BB#0: # %entry
     82 ; SSE2-NEXT:    pxor %xmm1, %xmm1
     83 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     84 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     85 ; SSE2-NEXT:    retq
     86 ;
     87 ; SSSE3-LABEL: zext_16i8_to_4i32:
     88 ; SSSE3:       # BB#0: # %entry
     89 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
     90 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     91 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     92 ; SSSE3-NEXT:    retq
     93 ;
     94 ; SSE41-LABEL: zext_16i8_to_4i32:
     95 ; SSE41:       # BB#0: # %entry
     96 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
     97 ; SSE41-NEXT:    retq
     98 ;
     99 ; AVX-LABEL: zext_16i8_to_4i32:
    100 ; AVX:       # BB#0: # %entry
    101 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    102 ; AVX-NEXT:    retq
    103 entry:
    104   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    105   %C = zext <4 x i8> %B to <4 x i32>
    106   ret <4 x i32> %C
    107 }
    108 
    109 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
    110 ; SSE2-LABEL: zext_16i8_to_8i32:
    111 ; SSE2:       # BB#0: # %entry
    112 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    113 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    114 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    115 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    116 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    117 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    118 ; SSE2-NEXT:    retq
    119 ;
    120 ; SSSE3-LABEL: zext_16i8_to_8i32:
    121 ; SSSE3:       # BB#0: # %entry
    122 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    123 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    124 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    125 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    126 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    127 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    128 ; SSSE3-NEXT:    retq
    129 ;
    130 ; SSE41-LABEL: zext_16i8_to_8i32:
    131 ; SSE41:       # BB#0: # %entry
    132 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    133 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    134 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    135 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    136 ; SSE41-NEXT:    retq
    137 ;
    138 ; AVX1-LABEL: zext_16i8_to_8i32:
    139 ; AVX1:       # BB#0: # %entry
    140 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    141 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    142 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
    143 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
    144 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    145 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
    146 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    147 ; AVX1-NEXT:    retq
    148 ;
    149 ; AVX2-LABEL: zext_16i8_to_8i32:
    150 ; AVX2:       # BB#0: # %entry
    151 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    152 ; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
    153 ; AVX2-NEXT:    retq
    154 entry:
    155   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    156   %C = zext <8 x i8> %B to <8 x i32>
    157   ret <8 x i32> %C
    158 }
    159 
    160 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
    161 ; SSE2-LABEL: zext_16i8_to_2i64:
    162 ; SSE2:       # BB#0: # %entry
    163 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    164 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    165 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    166 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    167 ; SSE2-NEXT:    retq
    168 ;
    169 ; SSSE3-LABEL: zext_16i8_to_2i64:
    170 ; SSSE3:       # BB#0: # %entry
    171 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    172 ; SSSE3-NEXT:    retq
    173 ;
    174 ; SSE41-LABEL: zext_16i8_to_2i64:
    175 ; SSE41:       # BB#0: # %entry
    176 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    177 ; SSE41-NEXT:    retq
    178 ;
    179 ; AVX-LABEL: zext_16i8_to_2i64:
    180 ; AVX:       # BB#0: # %entry
    181 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    182 ; AVX-NEXT:    retq
    183 entry:
    184   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
    185   %C = zext <2 x i8> %B to <2 x i64>
    186   ret <2 x i64> %C
    187 }
    188 
    189 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
    190 ; SSE2-LABEL: zext_16i8_to_4i64:
    191 ; SSE2:       # BB#0: # %entry
    192 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    193 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    194 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    195 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    196 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    197 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    198 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    199 ; SSE2-NEXT:    retq
    200 ;
    201 ; SSSE3-LABEL: zext_16i8_to_4i64:
    202 ; SSSE3:       # BB#0: # %entry
    203 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    204 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    205 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
    206 ; SSSE3-NEXT:    retq
    207 ;
    208 ; SSE41-LABEL: zext_16i8_to_4i64:
    209 ; SSE41:       # BB#0: # %entry
    210 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    211 ; SSE41-NEXT:    psrld $16, %xmm0
    212 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    213 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    214 ; SSE41-NEXT:    retq
    215 ;
    216 ; AVX1-LABEL: zext_16i8_to_4i64:
    217 ; AVX1:       # BB#0: # %entry
    218 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    219 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
    220 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
    221 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
    222 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    223 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
    224 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    225 ; AVX1-NEXT:    retq
    226 ;
    227 ; AVX2-LABEL: zext_16i8_to_4i64:
    228 ; AVX2:       # BB#0: # %entry
    229 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
    230 ; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
    231 ; AVX2-NEXT:    retq
    232 entry:
    233   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    234   %C = zext <4 x i8> %B to <4 x i64>
    235   ret <4 x i64> %C
    236 }
    237 
    238 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
    239 ; SSE2-LABEL: zext_8i16_to_4i32:
    240 ; SSE2:       # BB#0: # %entry
    241 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    242 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    243 ; SSE2-NEXT:    retq
    244 ;
    245 ; SSSE3-LABEL: zext_8i16_to_4i32:
    246 ; SSSE3:       # BB#0: # %entry
    247 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    248 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    249 ; SSSE3-NEXT:    retq
    250 ;
    251 ; SSE41-LABEL: zext_8i16_to_4i32:
    252 ; SSE41:       # BB#0: # %entry
    253 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    254 ; SSE41-NEXT:    retq
    255 ;
    256 ; AVX-LABEL: zext_8i16_to_4i32:
    257 ; AVX:       # BB#0: # %entry
    258 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    259 ; AVX-NEXT:    retq
    260 entry:
    261   %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    262   %C = zext <4 x i16> %B to <4 x i32>
    263   ret <4 x i32> %C
    264 }
    265 
    266 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
    267 ; SSE2-LABEL: zext_8i16_to_8i32:
    268 ; SSE2:       # BB#0: # %entry
    269 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    270 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    271 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    272 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    273 ; SSE2-NEXT:    retq
    274 ;
    275 ; SSSE3-LABEL: zext_8i16_to_8i32:
    276 ; SSSE3:       # BB#0: # %entry
    277 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    278 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    279 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    280 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    281 ; SSSE3-NEXT:    retq
    282 ;
    283 ; SSE41-LABEL: zext_8i16_to_8i32:
    284 ; SSE41:       # BB#0: # %entry
    285 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    286 ; SSE41-NEXT:    pxor %xmm2, %xmm2
    287 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
    288 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    289 ; SSE41-NEXT:    retq
    290 ;
    291 ; AVX1-LABEL: zext_8i16_to_8i32:
    292 ; AVX1:       # BB#0: # %entry
    293 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    294 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    295 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    296 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    297 ; AVX1-NEXT:    retq
    298 ;
    299 ; AVX2-LABEL: zext_8i16_to_8i32:
    300 ; AVX2:       # BB#0: # %entry
    301 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    302 ; AVX2-NEXT:    retq
    303 entry:
    304   %B = zext <8 x i16> %A to <8 x i32>
    305   ret <8 x i32>%B
    306 }
    307 
    308 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
    309 ; SSE2-LABEL: zext_8i16_to_2i64:
    310 ; SSE2:       # BB#0: # %entry
    311 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    312 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    313 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    314 ; SSE2-NEXT:    retq
    315 ;
    316 ; SSSE3-LABEL: zext_8i16_to_2i64:
    317 ; SSSE3:       # BB#0: # %entry
    318 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    319 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    320 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    321 ; SSSE3-NEXT:    retq
    322 ;
    323 ; SSE41-LABEL: zext_8i16_to_2i64:
    324 ; SSE41:       # BB#0: # %entry
    325 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    326 ; SSE41-NEXT:    retq
    327 ;
    328 ; AVX-LABEL: zext_8i16_to_2i64:
    329 ; AVX:       # BB#0: # %entry
    330 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    331 ; AVX-NEXT:    retq
    332 entry:
    333   %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
    334   %C = zext <2 x i16> %B to <2 x i64>
    335   ret <2 x i64> %C
    336 }
    337 
    338 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
    339 ; SSE2-LABEL: zext_8i16_to_4i64:
    340 ; SSE2:       # BB#0: # %entry
    341 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    342 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    343 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    344 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    345 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    346 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    347 ; SSE2-NEXT:    retq
    348 ;
    349 ; SSSE3-LABEL: zext_8i16_to_4i64:
    350 ; SSSE3:       # BB#0: # %entry
    351 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    352 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    353 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    354 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    355 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    356 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    357 ; SSSE3-NEXT:    retq
    358 ;
    359 ; SSE41-LABEL: zext_8i16_to_4i64:
    360 ; SSE41:       # BB#0: # %entry
    361 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    362 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    363 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    364 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    365 ; SSE41-NEXT:    retq
    366 ;
    367 ; AVX1-LABEL: zext_8i16_to_4i64:
    368 ; AVX1:       # BB#0: # %entry
    369 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    370 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
    371 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    372 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
    373 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    374 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
    375 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    376 ; AVX1-NEXT:    retq
    377 ;
    378 ; AVX2-LABEL: zext_8i16_to_4i64:
    379 ; AVX2:       # BB#0: # %entry
    380 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    381 ; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
    382 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
    383 ; AVX2-NEXT:    retq
    384 entry:
    385   %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    386   %C = zext <4 x i16> %B to <4 x i64>
    387   ret <4 x i64> %C
    388 }
    389 
    390 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
    391 ; SSE2-LABEL: zext_4i32_to_2i64:
    392 ; SSE2:       # BB#0: # %entry
    393 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    394 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    395 ; SSE2-NEXT:    retq
    396 ;
    397 ; SSSE3-LABEL: zext_4i32_to_2i64:
    398 ; SSSE3:       # BB#0: # %entry
    399 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    400 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    401 ; SSSE3-NEXT:    retq
    402 ;
    403 ; SSE41-LABEL: zext_4i32_to_2i64:
    404 ; SSE41:       # BB#0: # %entry
    405 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    406 ; SSE41-NEXT:    retq
    407 ;
    408 ; AVX-LABEL: zext_4i32_to_2i64:
    409 ; AVX:       # BB#0: # %entry
    410 ; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    411 ; AVX-NEXT:    retq
    412 entry:
    413   %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
    414   %C = zext <2 x i32> %B to <2 x i64>
    415   ret <2 x i64> %C
    416 }
    417 
    418 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
    419 ; SSE2-LABEL: zext_4i32_to_4i64:
    420 ; SSE2:       # BB#0: # %entry
    421 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    422 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    423 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    424 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    425 ; SSE2-NEXT:    retq
    426 ;
    427 ; SSSE3-LABEL: zext_4i32_to_4i64:
    428 ; SSSE3:       # BB#0: # %entry
    429 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    430 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    431 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    432 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    433 ; SSSE3-NEXT:    retq
    434 ;
    435 ; SSE41-LABEL: zext_4i32_to_4i64:
    436 ; SSE41:       # BB#0: # %entry
    437 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    438 ; SSE41-NEXT:    pxor %xmm2, %xmm2
    439 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
    440 ; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    441 ; SSE41-NEXT:    retq
    442 ;
    443 ; AVX1-LABEL: zext_4i32_to_4i64:
    444 ; AVX1:       # BB#0: # %entry
    445 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    446 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    447 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    448 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    449 ; AVX1-NEXT:    retq
    450 ;
    451 ; AVX2-LABEL: zext_4i32_to_4i64:
    452 ; AVX2:       # BB#0: # %entry
    453 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    454 ; AVX2-NEXT:    retq
    455 entry:
    456   %B = zext <4 x i32> %A to <4 x i64>
    457   ret <4 x i64>%B
    458 }
    459 
    460 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
    461 ; SSE2-LABEL: load_zext_2i8_to_2i64:
    462 ; SSE2:       # BB#0: # %entry
    463 ; SSE2-NEXT:    movzwl (%rdi), %eax
    464 ; SSE2-NEXT:    movd %eax, %xmm0
    465 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    466 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    467 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    468 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    469 ; SSE2-NEXT:    retq
    470 ;
    471 ; SSSE3-LABEL: load_zext_2i8_to_2i64:
    472 ; SSSE3:       # BB#0: # %entry
    473 ; SSSE3-NEXT:    movzwl (%rdi), %eax
    474 ; SSSE3-NEXT:    movd %eax, %xmm0
    475 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    476 ; SSSE3-NEXT:    retq
    477 ;
    478 ; SSE41-LABEL: load_zext_2i8_to_2i64:
    479 ; SSE41:       # BB#0: # %entry
    480 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    481 ; SSE41-NEXT:    retq
    482 ;
    483 ; AVX-LABEL: load_zext_2i8_to_2i64:
    484 ; AVX:       # BB#0: # %entry
    485 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    486 ; AVX-NEXT:    retq
    487 entry:
    488  %X = load <2 x i8>, <2 x i8>* %ptr
    489  %Y = zext <2 x i8> %X to <2 x i64>
    490  ret <2 x i64> %Y
    491 }
    492 
    493 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
    494 ; SSE2-LABEL: load_zext_4i8_to_4i32:
    495 ; SSE2:       # BB#0: # %entry
    496 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    497 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    498 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    499 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    500 ; SSE2-NEXT:    retq
    501 ;
    502 ; SSSE3-LABEL: load_zext_4i8_to_4i32:
    503 ; SSSE3:       # BB#0: # %entry
    504 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    505 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    506 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    507 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    508 ; SSSE3-NEXT:    retq
    509 ;
    510 ; SSE41-LABEL: load_zext_4i8_to_4i32:
    511 ; SSE41:       # BB#0: # %entry
    512 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    513 ; SSE41-NEXT:    retq
    514 ;
    515 ; AVX-LABEL: load_zext_4i8_to_4i32:
    516 ; AVX:       # BB#0: # %entry
    517 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    518 ; AVX-NEXT:    retq
    519 entry:
    520  %X = load <4 x i8>, <4 x i8>* %ptr
    521  %Y = zext <4 x i8> %X to <4 x i32>
    522  ret <4 x i32> %Y
    523 }
    524 
    525 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
    526 ; SSE2-LABEL: load_zext_4i8_to_4i64:
    527 ; SSE2:       # BB#0: # %entry
    528 ; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
    529 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    530 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    531 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
    532 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
    533 ; SSE2-NEXT:    pand %xmm2, %xmm0
    534 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
    535 ; SSE2-NEXT:    pand %xmm2, %xmm1
    536 ; SSE2-NEXT:    retq
    537 ;
    538 ; SSSE3-LABEL: load_zext_4i8_to_4i64:
    539 ; SSSE3:       # BB#0: # %entry
    540 ; SSSE3-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
    541 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    542 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    543 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    544 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero
    545 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,zero,zero,zero,zero,xmm1[12],zero,zero,zero,zero,zero,zero,zero
    546 ; SSSE3-NEXT:    retq
    547 ;
    548 ; SSE41-LABEL: load_zext_4i8_to_4i64:
    549 ; SSE41:       # BB#0: # %entry
    550 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    551 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    552 ; SSE41-NEXT:    retq
    553 ;
    554 ; AVX1-LABEL: load_zext_4i8_to_4i64:
    555 ; AVX1:       # BB#0: # %entry
    556 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    557 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    558 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    559 ; AVX1-NEXT:    retq
    560 ;
    561 ; AVX2-LABEL: load_zext_4i8_to_4i64:
    562 ; AVX2:       # BB#0: # %entry
    563 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
    564 ; AVX2-NEXT:    retq
    565 entry:
    566  %X = load <4 x i8>, <4 x i8>* %ptr
    567  %Y = zext <4 x i8> %X to <4 x i64>
    568  ret <4 x i64> %Y
    569 }
    570 
    571 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
    572 ; SSE2-LABEL: load_zext_8i8_to_8i16:
    573 ; SSE2:       # BB#0: # %entry
    574 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    575 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    576 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    577 ; SSE2-NEXT:    retq
    578 ;
    579 ; SSSE3-LABEL: load_zext_8i8_to_8i16:
    580 ; SSSE3:       # BB#0: # %entry
    581 ; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    582 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    583 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    584 ; SSSE3-NEXT:    retq
    585 ;
    586 ; SSE41-LABEL: load_zext_8i8_to_8i16:
    587 ; SSE41:       # BB#0: # %entry
    588 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    589 ; SSE41-NEXT:    retq
    590 ;
    591 ; AVX-LABEL: load_zext_8i8_to_8i16:
    592 ; AVX:       # BB#0: # %entry
    593 ; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    594 ; AVX-NEXT:    retq
    595 entry:
    596  %X = load <8 x i8>, <8 x i8>* %ptr
    597  %Y = zext <8 x i8> %X to <8 x i16>
    598  ret <8 x i16> %Y
    599 }
    600 
    601 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
    602 ; SSE2-LABEL: load_zext_8i8_to_8i32:
    603 ; SSE2:       # BB#0: # %entry
    604 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    605 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    606 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    607 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
    608 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
    609 ; SSE2-NEXT:    pand %xmm2, %xmm0
    610 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    611 ; SSE2-NEXT:    pand %xmm2, %xmm1
    612 ; SSE2-NEXT:    retq
    613 ;
    614 ; SSSE3-LABEL: load_zext_8i8_to_8i32:
    615 ; SSSE3:       # BB#0: # %entry
    616 ; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    617 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
    618 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    619 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[6],zero,zero,zero
    620 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[14],zero,zero,zero
    621 ; SSSE3-NEXT:    retq
    622 ;
    623 ; SSE41-LABEL: load_zext_8i8_to_8i32:
    624 ; SSE41:       # BB#0: # %entry
    625 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    626 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    627 ; SSE41-NEXT:    retq
    628 ;
    629 ; AVX1-LABEL: load_zext_8i8_to_8i32:
    630 ; AVX1:       # BB#0: # %entry
    631 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    632 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    633 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    634 ; AVX1-NEXT:    retq
    635 ;
    636 ; AVX2-LABEL: load_zext_8i8_to_8i32:
    637 ; AVX2:       # BB#0: # %entry
    638 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    639 ; AVX2-NEXT:    retq
    640 entry:
    641  %X = load <8 x i8>, <8 x i8>* %ptr
    642  %Y = zext <8 x i8> %X to <8 x i32>
    643  ret <8 x i32> %Y
    644 }
    645 
    646 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
    647 ; SSE2-LABEL: load_zext_16i8_to_16i16:
    648 ; SSE2:       # BB#0: # %entry
    649 ; SSE2-NEXT:    movdqa (%rdi), %xmm1
    650 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    651 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    652 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    653 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
    654 ; SSE2-NEXT:    retq
    655 ;
    656 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
    657 ; SSSE3:       # BB#0: # %entry
    658 ; SSSE3-NEXT:    movdqa (%rdi), %xmm1
    659 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    660 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    661 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    662 ; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
    663 ; SSSE3-NEXT:    retq
    664 ;
    665 ; SSE41-LABEL: load_zext_16i8_to_16i16:
    666 ; SSE41:       # BB#0: # %entry
    667 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    668 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    669 ; SSE41-NEXT:    retq
    670 ;
    671 ; AVX1-LABEL: load_zext_16i8_to_16i16:
    672 ; AVX1:       # BB#0: # %entry
    673 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    674 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    675 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    676 ; AVX1-NEXT:    retq
    677 ;
    678 ; AVX2-LABEL: load_zext_16i8_to_16i16:
    679 ; AVX2:       # BB#0: # %entry
    680 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    681 ; AVX2-NEXT:    retq
    682 entry:
    683  %X = load <16 x i8>, <16 x i8>* %ptr
    684  %Y = zext <16 x i8> %X to <16 x i16>
    685  ret <16 x i16> %Y
    686 }
    687 
    688 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
    689 ; SSE2-LABEL: load_zext_2i16_to_2i64:
    690 ; SSE2:       # BB#0: # %entry
    691 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    692 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    693 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    694 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    695 ; SSE2-NEXT:    retq
    696 ;
    697 ; SSSE3-LABEL: load_zext_2i16_to_2i64:
    698 ; SSSE3:       # BB#0: # %entry
    699 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    700 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    701 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    702 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    703 ; SSSE3-NEXT:    retq
    704 ;
    705 ; SSE41-LABEL: load_zext_2i16_to_2i64:
    706 ; SSE41:       # BB#0: # %entry
    707 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    708 ; SSE41-NEXT:    retq
    709 ;
    710 ; AVX-LABEL: load_zext_2i16_to_2i64:
    711 ; AVX:       # BB#0: # %entry
    712 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    713 ; AVX-NEXT:    retq
    714 entry:
    715  %X = load <2 x i16>, <2 x i16>* %ptr
    716  %Y = zext <2 x i16> %X to <2 x i64>
    717  ret <2 x i64> %Y
    718 }
    719 
    720 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
    721 ; SSE2-LABEL: load_zext_4i16_to_4i32:
    722 ; SSE2:       # BB#0: # %entry
    723 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    724 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    725 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    726 ; SSE2-NEXT:    retq
    727 ;
    728 ; SSSE3-LABEL: load_zext_4i16_to_4i32:
    729 ; SSSE3:       # BB#0: # %entry
    730 ; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    731 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    732 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    733 ; SSSE3-NEXT:    retq
    734 ;
    735 ; SSE41-LABEL: load_zext_4i16_to_4i32:
    736 ; SSE41:       # BB#0: # %entry
    737 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    738 ; SSE41-NEXT:    retq
    739 ;
    740 ; AVX-LABEL: load_zext_4i16_to_4i32:
    741 ; AVX:       # BB#0: # %entry
    742 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    743 ; AVX-NEXT:    retq
    744 entry:
    745  %X = load <4 x i16>, <4 x i16>* %ptr
    746  %Y = zext <4 x i16> %X to <4 x i32>
    747  ret <4 x i32> %Y
    748 }
    749 
    750 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
    751 ; SSE2-LABEL: load_zext_4i16_to_4i64:
    752 ; SSE2:       # BB#0: # %entry
    753 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    754 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    755 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
    756 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,0,0,0,65535,0,0,0]
    757 ; SSE2-NEXT:    pand %xmm2, %xmm0
    758 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
    759 ; SSE2-NEXT:    pand %xmm2, %xmm1
    760 ; SSE2-NEXT:    retq
    761 ;
    762 ; SSSE3-LABEL: load_zext_4i16_to_4i64:
    763 ; SSSE3:       # BB#0: # %entry
    764 ; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    765 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
    766 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    767 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero
    768 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[8,9],zero,zero,zero,zero,zero,zero,xmm1[12,13],zero,zero,zero,zero,zero,zero
    769 ; SSSE3-NEXT:    retq
    770 ;
    771 ; SSE41-LABEL: load_zext_4i16_to_4i64:
    772 ; SSE41:       # BB#0: # %entry
    773 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    774 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    775 ; SSE41-NEXT:    retq
    776 ;
    777 ; AVX1-LABEL: load_zext_4i16_to_4i64:
    778 ; AVX1:       # BB#0: # %entry
    779 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    780 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    781 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    782 ; AVX1-NEXT:    retq
    783 ;
    784 ; AVX2-LABEL: load_zext_4i16_to_4i64:
    785 ; AVX2:       # BB#0: # %entry
    786 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    787 ; AVX2-NEXT:    retq
    788 entry:
    789  %X = load <4 x i16>, <4 x i16>* %ptr
    790  %Y = zext <4 x i16> %X to <4 x i64>
    791  ret <4 x i64> %Y
    792 }
    793 
    794 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
    795 ; SSE2-LABEL: load_zext_8i16_to_8i32:
    796 ; SSE2:       # BB#0: # %entry
    797 ; SSE2-NEXT:    movdqa (%rdi), %xmm1
    798 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    799 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    800 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    801 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    802 ; SSE2-NEXT:    retq
    803 ;
    804 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
    805 ; SSSE3:       # BB#0: # %entry
    806 ; SSSE3-NEXT:    movdqa (%rdi), %xmm1
    807 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    808 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    809 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    810 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    811 ; SSSE3-NEXT:    retq
    812 ;
    813 ; SSE41-LABEL: load_zext_8i16_to_8i32:
    814 ; SSE41:       # BB#0: # %entry
    815 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    816 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    817 ; SSE41-NEXT:    retq
    818 ;
    819 ; AVX1-LABEL: load_zext_8i16_to_8i32:
    820 ; AVX1:       # BB#0: # %entry
    821 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    822 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    823 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    824 ; AVX1-NEXT:    retq
    825 ;
    826 ; AVX2-LABEL: load_zext_8i16_to_8i32:
    827 ; AVX2:       # BB#0: # %entry
    828 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    829 ; AVX2-NEXT:    retq
    830 entry:
    831  %X = load <8 x i16>, <8 x i16>* %ptr
    832  %Y = zext <8 x i16> %X to <8 x i32>
    833  ret <8 x i32> %Y
    834 }
    835 
    836 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
    837 ; SSE2-LABEL: load_zext_2i32_to_2i64:
    838 ; SSE2:       # BB#0: # %entry
    839 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    840 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    841 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    842 ; SSE2-NEXT:    retq
    843 ;
    844 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
    845 ; SSSE3:       # BB#0: # %entry
    846 ; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    847 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    848 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    849 ; SSSE3-NEXT:    retq
    850 ;
    851 ; SSE41-LABEL: load_zext_2i32_to_2i64:
    852 ; SSE41:       # BB#0: # %entry
    853 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
    854 ; SSE41-NEXT:    retq
    855 ;
    856 ; AVX-LABEL: load_zext_2i32_to_2i64:
    857 ; AVX:       # BB#0: # %entry
    858 ; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
    859 ; AVX-NEXT:    retq
    860 entry:
    861  %X = load <2 x i32>, <2 x i32>* %ptr
    862  %Y = zext <2 x i32> %X to <2 x i64>
    863  ret <2 x i64> %Y
    864 }
    865 
    866 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
    867 ; SSE2-LABEL: load_zext_4i32_to_4i64:
    868 ; SSE2:       # BB#0: # %entry
    869 ; SSE2-NEXT:    movdqa (%rdi), %xmm1
    870 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    871 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    872 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    873 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    874 ; SSE2-NEXT:    retq
    875 ;
    876 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
    877 ; SSSE3:       # BB#0: # %entry
    878 ; SSSE3-NEXT:    movdqa (%rdi), %xmm1
    879 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    880 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    881 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    882 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    883 ; SSSE3-NEXT:    retq
    884 ;
    885 ; SSE41-LABEL: load_zext_4i32_to_4i64:
    886 ; SSE41:       # BB#0: # %entry
    887 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
    888 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
    889 ; SSE41-NEXT:    retq
    890 ;
    891 ; AVX1-LABEL: load_zext_4i32_to_4i64:
    892 ; AVX1:       # BB#0: # %entry
    893 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
    894 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
    895 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    896 ; AVX1-NEXT:    retq
    897 ;
    898 ; AVX2-LABEL: load_zext_4i32_to_4i64:
    899 ; AVX2:       # BB#0: # %entry
    900 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    901 ; AVX2-NEXT:    retq
    902 entry:
    903  %X = load <4 x i32>, <4 x i32>* %ptr
    904  %Y = zext <4 x i32> %X to <4 x i64>
    905  ret <4 x i64> %Y
    906 }
    907 
    908 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
    909 ; SSE2-LABEL: zext_8i8_to_8i32:
    910 ; SSE2:       # BB#0: # %entry
    911 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    912 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
    913 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    914 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    915 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    916 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    917 ; SSE2-NEXT:    retq
    918 ;
    919 ; SSSE3-LABEL: zext_8i8_to_8i32:
    920 ; SSSE3:       # BB#0: # %entry
    921 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    922 ; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm1
    923 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    924 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    925 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    926 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    927 ; SSSE3-NEXT:    retq
    928 ;
    929 ; SSE41-LABEL: zext_8i8_to_8i32:
    930 ; SSE41:       # BB#0: # %entry
    931 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    932 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
    933 ; SSE41-NEXT:    pxor %xmm2, %xmm2
    934 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
    935 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    936 ; SSE41-NEXT:    retq
    937 ;
    938 ; AVX1-LABEL: zext_8i8_to_8i32:
    939 ; AVX1:       # BB#0: # %entry
    940 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    941 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    942 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    943 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    944 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    945 ; AVX1-NEXT:    retq
    946 ;
    947 ; AVX2-LABEL: zext_8i8_to_8i32:
    948 ; AVX2:       # BB#0: # %entry
    949 ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    950 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    951 ; AVX2-NEXT:    retq
    952 entry:
    953   %t = zext <8 x i8> %z to <8 x i32>
    954   ret <8 x i32> %t
    955 }
    956 
    957 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
    958 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
    959 ; SSE2:       # BB#0: # %entry
    960 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    961 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    962 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    963 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    964 ; SSE2-NEXT:    retq
    965 ;
    966 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
    967 ; SSSE3:       # BB#0: # %entry
    968 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    969 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    970 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    971 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    972 ; SSSE3-NEXT:    retq
    973 ;
    974 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
    975 ; SSE41:       # BB#0: # %entry
    976 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    977 ; SSE41-NEXT:    pxor %xmm2, %xmm2
    978 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
    979 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    980 ; SSE41-NEXT:    retq
    981 ;
    982 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
    983 ; AVX1:       # BB#0: # %entry
    984 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    985 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    986 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    987 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    988 ; AVX1-NEXT:    retq
    989 ;
    990 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
    991 ; AVX2:       # BB#0: # %entry
    992 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    993 ; AVX2-NEXT:    retq
    994 entry:
    995   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
    996   %Z = bitcast <16 x i16> %B to <8 x i32>
    997   ret <8 x i32> %Z
    998 }
    999 
   1000 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
   1001 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
   1002 ; SSE2:       # BB#0: # %entry
   1003 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1004 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1005 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1006 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1007 ; SSE2-NEXT:    retq
   1008 ;
   1009 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
   1010 ; SSSE3:       # BB#0: # %entry
   1011 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1012 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1013 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1014 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1015 ; SSSE3-NEXT:    retq
   1016 ;
   1017 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
   1018 ; SSE41:       # BB#0: # %entry
   1019 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1020 ; SSE41-NEXT:    pxor %xmm2, %xmm2
   1021 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
   1022 ; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1023 ; SSE41-NEXT:    retq
   1024 ;
   1025 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
   1026 ; AVX1:       # BB#0: # %entry
   1027 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
   1028 ; AVX1-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1029 ; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
   1030 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
   1031 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1032 ; AVX1-NEXT:    retq
   1033 ;
   1034 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
   1035 ; AVX2:       # BB#0: # %entry
   1036 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1037 ; AVX2-NEXT:    retq
   1038 entry:
   1039   %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
   1040   %Z = bitcast <8 x i32> %B to <4 x i64>
   1041   ret <4 x i64> %Z
   1042 }
   1043 
   1044 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
   1045 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
   1046 ; SSE2:       # BB#0: # %entry
   1047 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1048 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
   1049 ; SSE2-NEXT:    packuswb %xmm1, %xmm1
   1050 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1051 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1052 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1053 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1054 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1055 ; SSE2-NEXT:    retq
   1056 ;
   1057 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
   1058 ; SSSE3:       # BB#0: # %entry
   1059 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1060 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
   1061 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1062 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1063 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1064 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1065 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1066 ; SSSE3-NEXT:    retq
   1067 ;
   1068 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
   1069 ; SSE41:       # BB#0: # %entry
   1070 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
   1071 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1072 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1073 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1074 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
   1075 ; SSE41-NEXT:    retq
   1076 ;
   1077 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
   1078 ; AVX1:       # BB#0: # %entry
   1079 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
   1080 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1081 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1082 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1083 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1084 ; AVX1-NEXT:    retq
   1085 ;
   1086 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
   1087 ; AVX2:       # BB#0: # %entry
   1088 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
   1089 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1090 ; AVX2-NEXT:    retq
   1091 entry:
   1092   %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
   1093   %Z = bitcast <32 x i8> %B to <8 x i32>
   1094   ret <8 x i32> %Z
   1095 }
   1096 
   1097 define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
   1098 ; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
   1099 ; SSE2:       # BB#0: # %entry
   1100 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1101 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1102 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1103 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1104 ; SSE2-NEXT:    retq
   1105 ;
   1106 ; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
   1107 ; SSSE3:       # BB#0: # %entry
   1108 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
   1109 ; SSSE3-NEXT:    retq
   1110 ;
   1111 ; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
   1112 ; SSE41:       # BB#0: # %entry
   1113 ; SSE41-NEXT:    psrlq $48, %xmm0
   1114 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1115 ; SSE41-NEXT:    retq
   1116 ;
   1117 ; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6:
   1118 ; AVX:       # BB#0: # %entry
   1119 ; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm0
   1120 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1121 ; AVX-NEXT:    retq
   1122 entry:
   1123   %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
   1124   %Z = bitcast <16 x i8> %B to <2 x i64>
   1125   ret <2 x i64> %Z
   1126 }
   1127 
   1128 define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
   1129 ; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1130 ; SSE2:       # BB#0: # %entry
   1131 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1132 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
   1133 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1134 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
   1135 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1136 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1137 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1138 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1139 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   1140 ; SSE2-NEXT:    retq
   1141 ;
   1142 ; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1143 ; SSSE3:       # BB#0: # %entry
   1144 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1145 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
   1146 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
   1147 ; SSSE3-NEXT:    retq
   1148 ;
   1149 ; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1150 ; SSE41:       # BB#0: # %entry
   1151 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1152 ; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1153 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
   1154 ; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1155 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1156 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
   1157 ; SSE41-NEXT:    retq
   1158 ;
   1159 ; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1160 ; AVX1:       # BB#0: # %entry
   1161 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1162 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
   1163 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1164 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1165 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1166 ; AVX1-NEXT:    retq
   1167 ;
   1168 ; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1169 ; AVX2:       # BB#0: # %entry
   1170 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1171 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   1172 ; AVX2-NEXT:    retq
   1173 entry:
   1174   %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
   1175   %Z = bitcast <32 x i8> %B to <4 x i64>
   1176   ret <4 x i64> %Z
   1177 }
   1178 
   1179 define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
   1180 ; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
   1181 ; SSE2:       # BB#0: # %entry
   1182 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1183 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1184 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1185 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1186 ; SSE2-NEXT:    retq
   1187 ;
   1188 ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
   1189 ; SSSE3:       # BB#0: # %entry
   1190 ; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1191 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1192 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1193 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1194 ; SSSE3-NEXT:    retq
   1195 ;
   1196 ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
   1197 ; SSE41:       # BB#0: # %entry
   1198 ; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1199 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1200 ; SSE41-NEXT:    retq
   1201 ;
   1202 ; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6:
   1203 ; AVX:       # BB#0: # %entry
   1204 ; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1205 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1206 ; AVX-NEXT:    retq
   1207 entry:
   1208   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
   1209   %Z = bitcast <8 x i16> %B to <2 x i64>
   1210   ret <2 x i64> %Z
   1211 }
   1212 
   1213 define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
   1214 ; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1215 ; SSE2:       # BB#0: # %entry
   1216 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1217 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1218 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1219 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1220 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1221 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   1222 ; SSE2-NEXT:    retq
   1223 ;
   1224 ; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1225 ; SSSE3:       # BB#0: # %entry
   1226 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1227 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1228 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1229 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1230 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1231 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   1232 ; SSSE3-NEXT:    retq
   1233 ;
   1234 ; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1235 ; SSE41:       # BB#0: # %entry
   1236 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1237 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
   1238 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1239 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1240 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
   1241 ; SSE41-NEXT:    retq
   1242 ;
   1243 ; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1244 ; AVX1:       # BB#0: # %entry
   1245 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1246 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
   1247 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1248 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1249 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1250 ; AVX1-NEXT:    retq
   1251 ;
   1252 ; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1253 ; AVX2:       # BB#0: # %entry
   1254 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,2,2,3,5,6,6,7]
   1255 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1256 ; AVX2-NEXT:    retq
   1257 entry:
   1258   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
   1259   %Z = bitcast <16 x i16> %B to <4 x i64>
   1260   ret <4 x i64> %Z
   1261 }
   1262 
   1263 define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
   1264 ; SSE-LABEL: shuf_zext_8i16_to_4i32_offset1:
   1265 ; SSE:       # BB#0: # %entry
   1266 ; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1267 ; SSE-NEXT:    pxor %xmm1, %xmm1
   1268 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1269 ; SSE-NEXT:    retq
   1270 ;
   1271 ; AVX-LABEL: shuf_zext_8i16_to_4i32_offset1:
   1272 ; AVX:       # BB#0: # %entry
   1273 ; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1274 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1275 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1276 ; AVX-NEXT:    retq
   1277 entry:
   1278   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
   1279   %Z = bitcast <8 x i16> %B to <4 x i32>
   1280   ret <4 x i32> %Z
   1281 }
   1282 
   1283 define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
   1284 ; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1285 ; SSE2:       # BB#0: # %entry
   1286 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1287 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1288 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1289 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1290 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1291 ; SSE2-NEXT:    retq
   1292 ;
   1293 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1294 ; SSSE3:       # BB#0: # %entry
   1295 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1296 ; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1297 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1298 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1299 ; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1300 ; SSSE3-NEXT:    retq
   1301 ;
   1302 ; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1303 ; SSE41:       # BB#0: # %entry
   1304 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1305 ; SSE41-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1306 ; SSE41-NEXT:    pxor %xmm2, %xmm2
   1307 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   1308 ; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1309 ; SSE41-NEXT:    retq
   1310 ;
   1311 ; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1312 ; AVX1:       # BB#0: # %entry
   1313 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1314 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   1315 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1316 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1317 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1318 ; AVX1-NEXT:    retq
   1319 ;
   1320 ; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1321 ; AVX2:       # BB#0: # %entry
   1322 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
   1323 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1324 ; AVX2-NEXT:    retq
   1325 entry:
   1326   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
   1327   %Z = bitcast <16 x i16> %B to <8 x i32>
   1328   ret <8 x i32> %Z
   1329 }
   1330 
   1331 define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
   1332 ; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
   1333 ; SSE2:       # BB#0: # %entry
   1334 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1335 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1336 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1337 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1338 ; SSE2-NEXT:    retq
   1339 ;
   1340 ; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
   1341 ; SSSE3:       # BB#0: # %entry
   1342 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1343 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1344 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1345 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1346 ; SSSE3-NEXT:    retq
   1347 ;
   1348 ; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
   1349 ; SSE41:       # BB#0: # %entry
   1350 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
   1351 ; SSE41-NEXT:    pxor %xmm2, %xmm2
   1352 ; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
   1353 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
   1354 ; SSE41-NEXT:    movdqa %xmm2, %xmm1
   1355 ; SSE41-NEXT:    retq
   1356 ;
   1357 ; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
   1358 ; AVX1:       # BB#0: # %entry
   1359 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1360 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
   1361 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   1362 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
   1363 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1364 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1365 ; AVX1-NEXT:    retq
   1366 ;
   1367 ; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
   1368 ; AVX2:       # BB#0: # %entry
   1369 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
   1370 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1371 ; AVX2-NEXT:    retq
   1372 entry:
   1373   %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
   1374   %Z = bitcast <16 x i16> %B to <8 x i32>
   1375   ret <8 x i32> %Z
   1376 }
   1377 
   1378 define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
   1379 ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
   1380 ; SSE:       # BB#0: # %entry
   1381 ; SSE-NEXT:    pxor %xmm1, %xmm1
   1382 ; SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1383 ; SSE-NEXT:    retq
   1384 ;
   1385 ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
   1386 ; AVX:       # BB#0: # %entry
   1387 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1388 ; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1389 ; AVX-NEXT:    retq
   1390 entry:
   1391   %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
   1392   %Z = bitcast <4 x i32> %B to <2 x i64>
   1393   ret <2 x i64> %Z
   1394 }
   1395 
   1396 define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
   1397 ; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
   1398 ; SSE2:       # BB#0: # %entry
   1399 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1400 ; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
   1401 ; SSE2-NEXT:    pand %xmm1, %xmm0
   1402 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1403 ; SSE2-NEXT:    retq
   1404 ;
   1405 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
   1406 ; SSSE3:       # BB#0: # %entry
   1407 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1408 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
   1409 ; SSSE3-NEXT:    pand %xmm1, %xmm0
   1410 ; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1411 ; SSSE3-NEXT:    retq
   1412 ;
   1413 ; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
   1414 ; SSE41:       # BB#0: # %entry
   1415 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1416 ; SSE41-NEXT:    pxor %xmm0, %xmm0
   1417 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
   1418 ; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1419 ; SSE41-NEXT:    retq
   1420 ;
   1421 ; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
   1422 ; AVX1:       # BB#0: # %entry
   1423 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[3],zero,zero,zero
   1424 ; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1425 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
   1426 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1427 ; AVX1-NEXT:    retq
   1428 ;
   1429 ; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
   1430 ; AVX2:       # BB#0: # %entry
   1431 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,3,5,6,7,7]
   1432 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1433 ; AVX2-NEXT:    retq
   1434 entry:
   1435   %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
   1436   %Z = bitcast <8 x i32> %B to <4 x i64>
   1437   ret <4 x i64> %Z
   1438 }
   1439