Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
      8 
      9 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
     10 ; SSE2-LABEL: zext_16i8_to_8i16:
     11 ; SSE2:       # BB#0: # %entry
     12 ; SSE2-NEXT:    pxor %xmm1, %xmm1
     13 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     14 ; SSE2-NEXT:    retq
     15 ;
     16 ; SSSE3-LABEL: zext_16i8_to_8i16:
     17 ; SSSE3:       # BB#0: # %entry
     18 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
     19 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     20 ; SSSE3-NEXT:    retq
     21 ;
     22 ; SSE41-LABEL: zext_16i8_to_8i16:
     23 ; SSE41:       # BB#0: # %entry
     24 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     25 ; SSE41-NEXT:    retq
     26 ;
     27 ; AVX-LABEL: zext_16i8_to_8i16:
     28 ; AVX:       # BB#0: # %entry
     29 ; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     30 ; AVX-NEXT:    retq
     31 entry:
     32   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     33   %C = zext <8 x i8> %B to <8 x i16>
     34   ret <8 x i16> %C
     35 }
     36 
     37 ; PR17654
     38 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
     39 ; SSE2-LABEL: zext_16i8_to_16i16:
     40 ; SSE2:       # BB#0: # %entry
     41 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
     42 ; SSE2-NEXT:    pxor %xmm2, %xmm2
     43 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
     44 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
     45 ; SSE2-NEXT:    retq
     46 ;
     47 ; SSSE3-LABEL: zext_16i8_to_16i16:
     48 ; SSSE3:       # BB#0: # %entry
     49 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
     50 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
     51 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
     52 ; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
     53 ; SSSE3-NEXT:    retq
     54 ;
     55 ; SSE41-LABEL: zext_16i8_to_16i16:
     56 ; SSE41:       # BB#0: # %entry
     57 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
     58 ; SSE41-NEXT:    pxor %xmm2, %xmm2
     59 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
     60 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
     61 ; SSE41-NEXT:    retq
     62 ;
     63 ; AVX1-LABEL: zext_16i8_to_16i16:
     64 ; AVX1:       # BB#0: # %entry
     65 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
     66 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
     67 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     68 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     69 ; AVX1-NEXT:    retq
     70 ;
     71 ; AVX2-LABEL: zext_16i8_to_16i16:
     72 ; AVX2:       # BB#0: # %entry
     73 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
     74 ; AVX2-NEXT:    retq
     75 ;
     76 ; AVX512-LABEL: zext_16i8_to_16i16:
     77 ; AVX512:       # BB#0: # %entry
     78 ; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
     79 ; AVX512-NEXT:    retq
     80 entry:
     81   %B = zext <16 x i8> %A to <16 x i16>
     82   ret <16 x i16> %B
     83 }
     84 
     85 define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
     86 ; SSE2-LABEL: zext_16i8_to_4i32:
     87 ; SSE2:       # BB#0: # %entry
     88 ; SSE2-NEXT:    pxor %xmm1, %xmm1
     89 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     90 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     91 ; SSE2-NEXT:    retq
     92 ;
     93 ; SSSE3-LABEL: zext_16i8_to_4i32:
     94 ; SSSE3:       # BB#0: # %entry
     95 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
     96 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     97 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     98 ; SSSE3-NEXT:    retq
     99 ;
    100 ; SSE41-LABEL: zext_16i8_to_4i32:
    101 ; SSE41:       # BB#0: # %entry
    102 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    103 ; SSE41-NEXT:    retq
    104 ;
    105 ; AVX-LABEL: zext_16i8_to_4i32:
    106 ; AVX:       # BB#0: # %entry
    107 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    108 ; AVX-NEXT:    retq
    109 entry:
    110   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    111   %C = zext <4 x i8> %B to <4 x i32>
    112   ret <4 x i32> %C
    113 }
    114 
    115 define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
    116 ; SSE2-LABEL: zext_16i8_to_8i32:
    117 ; SSE2:       # BB#0: # %entry
    118 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    119 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    120 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    121 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    122 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    123 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    124 ; SSE2-NEXT:    retq
    125 ;
    126 ; SSSE3-LABEL: zext_16i8_to_8i32:
    127 ; SSSE3:       # BB#0: # %entry
    128 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    129 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    130 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    131 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    132 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    133 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    134 ; SSSE3-NEXT:    retq
    135 ;
    136 ; SSE41-LABEL: zext_16i8_to_8i32:
    137 ; SSE41:       # BB#0: # %entry
    138 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    139 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    140 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    141 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    142 ; SSE41-NEXT:    retq
    143 ;
    144 ; AVX1-LABEL: zext_16i8_to_8i32:
    145 ; AVX1:       # BB#0: # %entry
    146 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    147 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    148 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    149 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    150 ; AVX1-NEXT:    retq
    151 ;
    152 ; AVX2-LABEL: zext_16i8_to_8i32:
    153 ; AVX2:       # BB#0: # %entry
    154 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    155 ; AVX2-NEXT:    retq
    156 ;
    157 ; AVX512-LABEL: zext_16i8_to_8i32:
    158 ; AVX512:       # BB#0: # %entry
    159 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    160 ; AVX512-NEXT:    retq
    161 entry:
    162   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    163   %C = zext <8 x i8> %B to <8 x i32>
    164   ret <8 x i32> %C
    165 }
    166 
    167 define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
    168 ; SSE2-LABEL: zext_16i8_to_2i64:
    169 ; SSE2:       # BB#0: # %entry
    170 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    171 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    172 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    173 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    174 ; SSE2-NEXT:    retq
    175 ;
    176 ; SSSE3-LABEL: zext_16i8_to_2i64:
    177 ; SSSE3:       # BB#0: # %entry
    178 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    179 ; SSSE3-NEXT:    retq
    180 ;
    181 ; SSE41-LABEL: zext_16i8_to_2i64:
    182 ; SSE41:       # BB#0: # %entry
    183 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    184 ; SSE41-NEXT:    retq
    185 ;
    186 ; AVX-LABEL: zext_16i8_to_2i64:
    187 ; AVX:       # BB#0: # %entry
    188 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    189 ; AVX-NEXT:    retq
    190 entry:
    191   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
    192   %C = zext <2 x i8> %B to <2 x i64>
    193   ret <2 x i64> %C
    194 }
    195 
    196 define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
    197 ; SSE2-LABEL: zext_16i8_to_4i64:
    198 ; SSE2:       # BB#0: # %entry
    199 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    200 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    201 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    202 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    203 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    204 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    205 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    206 ; SSE2-NEXT:    retq
    207 ;
    208 ; SSSE3-LABEL: zext_16i8_to_4i64:
    209 ; SSSE3:       # BB#0: # %entry
    210 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    211 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    212 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
    213 ; SSSE3-NEXT:    retq
    214 ;
    215 ; SSE41-LABEL: zext_16i8_to_4i64:
    216 ; SSE41:       # BB#0: # %entry
    217 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    218 ; SSE41-NEXT:    psrld $16, %xmm0
    219 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    220 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    221 ; SSE41-NEXT:    retq
    222 ;
    223 ; AVX1-LABEL: zext_16i8_to_4i64:
    224 ; AVX1:       # BB#0: # %entry
    225 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    226 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
    227 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    228 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    229 ; AVX1-NEXT:    retq
    230 ;
    231 ; AVX2-LABEL: zext_16i8_to_4i64:
    232 ; AVX2:       # BB#0: # %entry
    233 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
    234 ; AVX2-NEXT:    retq
    235 ;
    236 ; AVX512-LABEL: zext_16i8_to_4i64:
    237 ; AVX512:       # BB#0: # %entry
    238 ; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
    239 ; AVX512-NEXT:    retq
    240 entry:
    241   %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    242   %C = zext <4 x i8> %B to <4 x i64>
    243   ret <4 x i64> %C
    244 }
    245 
    246 define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
    247 ; SSE2-LABEL: zext_8i16_to_4i32:
    248 ; SSE2:       # BB#0: # %entry
    249 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    250 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    251 ; SSE2-NEXT:    retq
    252 ;
    253 ; SSSE3-LABEL: zext_8i16_to_4i32:
    254 ; SSSE3:       # BB#0: # %entry
    255 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    256 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    257 ; SSSE3-NEXT:    retq
    258 ;
    259 ; SSE41-LABEL: zext_8i16_to_4i32:
    260 ; SSE41:       # BB#0: # %entry
    261 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    262 ; SSE41-NEXT:    retq
    263 ;
    264 ; AVX-LABEL: zext_8i16_to_4i32:
    265 ; AVX:       # BB#0: # %entry
    266 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    267 ; AVX-NEXT:    retq
    268 entry:
    269   %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    270   %C = zext <4 x i16> %B to <4 x i32>
    271   ret <4 x i32> %C
    272 }
    273 
    274 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
    275 ; SSE2-LABEL: zext_8i16_to_8i32:
    276 ; SSE2:       # BB#0: # %entry
    277 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    278 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    279 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    280 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    281 ; SSE2-NEXT:    retq
    282 ;
    283 ; SSSE3-LABEL: zext_8i16_to_8i32:
    284 ; SSSE3:       # BB#0: # %entry
    285 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    286 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    287 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    288 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    289 ; SSSE3-NEXT:    retq
    290 ;
    291 ; SSE41-LABEL: zext_8i16_to_8i32:
    292 ; SSE41:       # BB#0: # %entry
    293 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    294 ; SSE41-NEXT:    pxor %xmm2, %xmm2
    295 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
    296 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    297 ; SSE41-NEXT:    retq
    298 ;
    299 ; AVX1-LABEL: zext_8i16_to_8i32:
    300 ; AVX1:       # BB#0: # %entry
    301 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    302 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    303 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    304 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    305 ; AVX1-NEXT:    retq
    306 ;
    307 ; AVX2-LABEL: zext_8i16_to_8i32:
    308 ; AVX2:       # BB#0: # %entry
    309 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    310 ; AVX2-NEXT:    retq
    311 ;
    312 ; AVX512-LABEL: zext_8i16_to_8i32:
    313 ; AVX512:       # BB#0: # %entry
    314 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    315 ; AVX512-NEXT:    retq
    316 entry:
    317   %B = zext <8 x i16> %A to <8 x i32>
    318   ret <8 x i32>%B
    319 }
    320 
    321 define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
    322 ; SSE2-LABEL: zext_8i16_to_2i64:
    323 ; SSE2:       # BB#0: # %entry
    324 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    325 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    326 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    327 ; SSE2-NEXT:    retq
    328 ;
    329 ; SSSE3-LABEL: zext_8i16_to_2i64:
    330 ; SSSE3:       # BB#0: # %entry
    331 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    332 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    333 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    334 ; SSSE3-NEXT:    retq
    335 ;
    336 ; SSE41-LABEL: zext_8i16_to_2i64:
    337 ; SSE41:       # BB#0: # %entry
    338 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    339 ; SSE41-NEXT:    retq
    340 ;
    341 ; AVX-LABEL: zext_8i16_to_2i64:
    342 ; AVX:       # BB#0: # %entry
    343 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    344 ; AVX-NEXT:    retq
    345 entry:
    346   %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
    347   %C = zext <2 x i16> %B to <2 x i64>
    348   ret <2 x i64> %C
    349 }
    350 
    351 define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
    352 ; SSE2-LABEL: zext_8i16_to_4i64:
    353 ; SSE2:       # BB#0: # %entry
    354 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    355 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    356 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    357 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    358 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    359 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    360 ; SSE2-NEXT:    retq
    361 ;
    362 ; SSSE3-LABEL: zext_8i16_to_4i64:
    363 ; SSSE3:       # BB#0: # %entry
    364 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    365 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    366 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    367 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    368 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    369 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    370 ; SSSE3-NEXT:    retq
    371 ;
    372 ; SSE41-LABEL: zext_8i16_to_4i64:
    373 ; SSE41:       # BB#0: # %entry
    374 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    375 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    376 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    377 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    378 ; SSE41-NEXT:    retq
    379 ;
    380 ; AVX1-LABEL: zext_8i16_to_4i64:
    381 ; AVX1:       # BB#0: # %entry
    382 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    383 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    384 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    385 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    386 ; AVX1-NEXT:    retq
    387 ;
    388 ; AVX2-LABEL: zext_8i16_to_4i64:
    389 ; AVX2:       # BB#0: # %entry
    390 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    391 ; AVX2-NEXT:    retq
    392 ;
    393 ; AVX512-LABEL: zext_8i16_to_4i64:
    394 ; AVX512:       # BB#0: # %entry
    395 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    396 ; AVX512-NEXT:    retq
    397 entry:
    398   %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    399   %C = zext <4 x i16> %B to <4 x i64>
    400   ret <4 x i64> %C
    401 }
    402 
    403 define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
    404 ; SSE2-LABEL: zext_4i32_to_2i64:
    405 ; SSE2:       # BB#0: # %entry
    406 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    407 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    408 ; SSE2-NEXT:    retq
    409 ;
    410 ; SSSE3-LABEL: zext_4i32_to_2i64:
    411 ; SSSE3:       # BB#0: # %entry
    412 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    413 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    414 ; SSSE3-NEXT:    retq
    415 ;
    416 ; SSE41-LABEL: zext_4i32_to_2i64:
    417 ; SSE41:       # BB#0: # %entry
    418 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    419 ; SSE41-NEXT:    retq
    420 ;
    421 ; AVX-LABEL: zext_4i32_to_2i64:
    422 ; AVX:       # BB#0: # %entry
    423 ; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    424 ; AVX-NEXT:    retq
    425 entry:
    426   %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
    427   %C = zext <2 x i32> %B to <2 x i64>
    428   ret <2 x i64> %C
    429 }
    430 
    431 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
    432 ; SSE2-LABEL: zext_4i32_to_4i64:
    433 ; SSE2:       # BB#0: # %entry
    434 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    435 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    436 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    437 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    438 ; SSE2-NEXT:    retq
    439 ;
    440 ; SSSE3-LABEL: zext_4i32_to_4i64:
    441 ; SSSE3:       # BB#0: # %entry
    442 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    443 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    444 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    445 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    446 ; SSSE3-NEXT:    retq
    447 ;
    448 ; SSE41-LABEL: zext_4i32_to_4i64:
    449 ; SSE41:       # BB#0: # %entry
    450 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    451 ; SSE41-NEXT:    pxor %xmm2, %xmm2
    452 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
    453 ; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    454 ; SSE41-NEXT:    retq
    455 ;
    456 ; AVX1-LABEL: zext_4i32_to_4i64:
    457 ; AVX1:       # BB#0: # %entry
    458 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    459 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    460 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
    461 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    462 ; AVX1-NEXT:    retq
    463 ;
    464 ; AVX2-LABEL: zext_4i32_to_4i64:
    465 ; AVX2:       # BB#0: # %entry
    466 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    467 ; AVX2-NEXT:    retq
    468 ;
    469 ; AVX512-LABEL: zext_4i32_to_4i64:
    470 ; AVX512:       # BB#0: # %entry
    471 ; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    472 ; AVX512-NEXT:    retq
    473 entry:
    474   %B = zext <4 x i32> %A to <4 x i64>
    475   ret <4 x i64>%B
    476 }
    477 
    478 define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
    479 ; SSE2-LABEL: load_zext_2i8_to_2i64:
    480 ; SSE2:       # BB#0: # %entry
    481 ; SSE2-NEXT:    movzwl (%rdi), %eax
    482 ; SSE2-NEXT:    movd %eax, %xmm0
    483 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    484 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    485 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    486 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    487 ; SSE2-NEXT:    retq
    488 ;
    489 ; SSSE3-LABEL: load_zext_2i8_to_2i64:
    490 ; SSSE3:       # BB#0: # %entry
    491 ; SSSE3-NEXT:    movzwl (%rdi), %eax
    492 ; SSSE3-NEXT:    movd %eax, %xmm0
    493 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    494 ; SSSE3-NEXT:    retq
    495 ;
    496 ; SSE41-LABEL: load_zext_2i8_to_2i64:
    497 ; SSE41:       # BB#0: # %entry
    498 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    499 ; SSE41-NEXT:    retq
    500 ;
    501 ; AVX-LABEL: load_zext_2i8_to_2i64:
    502 ; AVX:       # BB#0: # %entry
    503 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    504 ; AVX-NEXT:    retq
    505 entry:
    506  %X = load <2 x i8>, <2 x i8>* %ptr
    507  %Y = zext <2 x i8> %X to <2 x i64>
    508  ret <2 x i64> %Y
    509 }
    510 
    511 define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
    512 ; SSE2-LABEL: load_zext_4i8_to_4i32:
    513 ; SSE2:       # BB#0: # %entry
    514 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    515 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    516 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    517 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    518 ; SSE2-NEXT:    retq
    519 ;
    520 ; SSSE3-LABEL: load_zext_4i8_to_4i32:
    521 ; SSSE3:       # BB#0: # %entry
    522 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    523 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    524 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    525 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    526 ; SSSE3-NEXT:    retq
    527 ;
    528 ; SSE41-LABEL: load_zext_4i8_to_4i32:
    529 ; SSE41:       # BB#0: # %entry
    530 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    531 ; SSE41-NEXT:    retq
    532 ;
    533 ; AVX-LABEL: load_zext_4i8_to_4i32:
    534 ; AVX:       # BB#0: # %entry
    535 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    536 ; AVX-NEXT:    retq
    537 entry:
    538  %X = load <4 x i8>, <4 x i8>* %ptr
    539  %Y = zext <4 x i8> %X to <4 x i32>
    540  ret <4 x i32> %Y
    541 }
    542 
    543 define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
    544 ; SSE2-LABEL: load_zext_4i8_to_4i64:
    545 ; SSE2:       # BB#0: # %entry
    546 ; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
    547 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    548 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    549 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    550 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    551 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    552 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    553 ; SSE2-NEXT:    retq
    554 ;
    555 ; SSSE3-LABEL: load_zext_4i8_to_4i64:
    556 ; SSSE3:       # BB#0: # %entry
    557 ; SSSE3-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
    558 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    559 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    560 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
    561 ; SSSE3-NEXT:    retq
    562 ;
    563 ; SSE41-LABEL: load_zext_4i8_to_4i64:
    564 ; SSE41:       # BB#0: # %entry
    565 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    566 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    567 ; SSE41-NEXT:    retq
    568 ;
    569 ; AVX1-LABEL: load_zext_4i8_to_4i64:
    570 ; AVX1:       # BB#0: # %entry
    571 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    572 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    573 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    574 ; AVX1-NEXT:    retq
    575 ;
    576 ; AVX2-LABEL: load_zext_4i8_to_4i64:
    577 ; AVX2:       # BB#0: # %entry
    578 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
    579 ; AVX2-NEXT:    retq
    580 ;
    581 ; AVX512-LABEL: load_zext_4i8_to_4i64:
    582 ; AVX512:       # BB#0: # %entry
    583 ; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
    584 ; AVX512-NEXT:    retq
    585 entry:
    586  %X = load <4 x i8>, <4 x i8>* %ptr
    587  %Y = zext <4 x i8> %X to <4 x i64>
    588  ret <4 x i64> %Y
    589 }
    590 
    591 define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
    592 ; SSE2-LABEL: load_zext_8i8_to_8i16:
    593 ; SSE2:       # BB#0: # %entry
    594 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    595 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    596 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    597 ; SSE2-NEXT:    retq
    598 ;
    599 ; SSSE3-LABEL: load_zext_8i8_to_8i16:
    600 ; SSSE3:       # BB#0: # %entry
    601 ; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    602 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    603 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    604 ; SSSE3-NEXT:    retq
    605 ;
    606 ; SSE41-LABEL: load_zext_8i8_to_8i16:
    607 ; SSE41:       # BB#0: # %entry
    608 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    609 ; SSE41-NEXT:    retq
    610 ;
    611 ; AVX-LABEL: load_zext_8i8_to_8i16:
    612 ; AVX:       # BB#0: # %entry
    613 ; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    614 ; AVX-NEXT:    retq
    615 entry:
    616  %X = load <8 x i8>, <8 x i8>* %ptr
    617  %Y = zext <8 x i8> %X to <8 x i16>
    618  ret <8 x i16> %Y
    619 }
    620 
    621 define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
    622 ; SSE2-LABEL: load_zext_8i8_to_8i32:
    623 ; SSE2:       # BB#0: # %entry
    624 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    625 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    626 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    627 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    628 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    629 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    630 ; SSE2-NEXT:    retq
    631 ;
    632 ; SSSE3-LABEL: load_zext_8i8_to_8i32:
    633 ; SSSE3:       # BB#0: # %entry
    634 ; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    635 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    636 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    637 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    638 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    639 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    640 ; SSSE3-NEXT:    retq
    641 ;
    642 ; SSE41-LABEL: load_zext_8i8_to_8i32:
    643 ; SSE41:       # BB#0: # %entry
    644 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    645 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    646 ; SSE41-NEXT:    retq
    647 ;
    648 ; AVX1-LABEL: load_zext_8i8_to_8i32:
    649 ; AVX1:       # BB#0: # %entry
    650 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    651 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    652 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    653 ; AVX1-NEXT:    retq
    654 ;
    655 ; AVX2-LABEL: load_zext_8i8_to_8i32:
    656 ; AVX2:       # BB#0: # %entry
    657 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    658 ; AVX2-NEXT:    retq
    659 ;
    660 ; AVX512-LABEL: load_zext_8i8_to_8i32:
    661 ; AVX512:       # BB#0: # %entry
    662 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    663 ; AVX512-NEXT:    retq
    664 entry:
    665  %X = load <8 x i8>, <8 x i8>* %ptr
    666  %Y = zext <8 x i8> %X to <8 x i32>
    667  ret <8 x i32> %Y
    668 }
    669 
    670 define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) {
    671 ; SSE2-LABEL: load_zext_16i8_to_8i32:
    672 ; SSE2:       # BB#0: # %entry
    673 ; SSE2-NEXT:    movdqa (%rdi), %xmm1
    674 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    675 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    676 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    677 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    678 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    679 ; SSE2-NEXT:    retq
    680 ;
    681 ; SSSE3-LABEL: load_zext_16i8_to_8i32:
    682 ; SSSE3:       # BB#0: # %entry
    683 ; SSSE3-NEXT:    movdqa (%rdi), %xmm1
    684 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    685 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    686 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    687 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    688 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    689 ; SSSE3-NEXT:    retq
    690 ;
    691 ; SSE41-LABEL: load_zext_16i8_to_8i32:
    692 ; SSE41:       # BB#0: # %entry
    693 ; SSE41-NEXT:    movdqa (%rdi), %xmm1
    694 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
    695 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
    696 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
    697 ; SSE41-NEXT:    retq
    698 ;
    699 ; AVX1-LABEL: load_zext_16i8_to_8i32:
    700 ; AVX1:       # BB#0: # %entry
    701 ; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
    702 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    703 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    704 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    705 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    706 ; AVX1-NEXT:    retq
    707 ;
    708 ; AVX2-LABEL: load_zext_16i8_to_8i32:
    709 ; AVX2:       # BB#0: # %entry
    710 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    711 ; AVX2-NEXT:    retq
    712 ;
    713 ; AVX512-LABEL: load_zext_16i8_to_8i32:
    714 ; AVX512:       # BB#0: # %entry
    715 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    716 ; AVX512-NEXT:    retq
    717 entry:
    718  %X = load <16 x i8>, <16 x i8>* %ptr
    719  %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    720  %Z = zext <8 x i8> %Y to <8 x i32>
    721  ret <8 x i32> %Z
    722 }
    723 
    724 define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) {
    725 ; SSE2-LABEL: load_zext_8i8_to_8i64:
    726 ; SSE2:       # BB#0: # %entry
    727 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    728 ; SSE2-NEXT:    pxor %xmm4, %xmm4
    729 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
    730 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
    731 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
    732 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    733 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
    734 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
    735 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
    736 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
    737 ; SSE2-NEXT:    movdqa %xmm3, %xmm2
    738 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
    739 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
    740 ; SSE2-NEXT:    retq
    741 ;
    742 ; SSSE3-LABEL: load_zext_8i8_to_8i64:
    743 ; SSSE3:       # BB#0: # %entry
    744 ; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    745 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128]
    746 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    747 ; SSSE3-NEXT:    pshufb %xmm4, %xmm0
    748 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128]
    749 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
    750 ; SSSE3-NEXT:    pshufb %xmm5, %xmm1
    751 ; SSSE3-NEXT:    movdqa %xmm3, %xmm2
    752 ; SSSE3-NEXT:    pshufb %xmm4, %xmm2
    753 ; SSSE3-NEXT:    pshufb %xmm5, %xmm3
    754 ; SSSE3-NEXT:    retq
    755 ;
    756 ; SSE41-LABEL: load_zext_8i8_to_8i64:
    757 ; SSE41:       # BB#0: # %entry
    758 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    759 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    760 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    761 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    762 ; SSE41-NEXT:    retq
    763 ;
    764 ; AVX1-LABEL: load_zext_8i8_to_8i64:
    765 ; AVX1:       # BB#0: # %entry
    766 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    767 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    768 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    769 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    770 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    771 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
    772 ; AVX1-NEXT:    retq
    773 ;
    774 ; AVX2-LABEL: load_zext_8i8_to_8i64:
    775 ; AVX2:       # BB#0: # %entry
    776 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
    777 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
    778 ; AVX2-NEXT:    retq
    779 ;
    780 ; AVX512-LABEL: load_zext_8i8_to_8i64:
    781 ; AVX512:       # BB#0: # %entry
    782 ; AVX512-NEXT:    vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
    783 ; AVX512-NEXT:    retq
    784 entry:
    785  %X = load <8 x i8>, <8 x i8>* %ptr
    786  %Y = zext <8 x i8> %X to <8 x i64>
    787  ret <8 x i64> %Y
    788 }
    789 
    790 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
    791 ; SSE2-LABEL: load_zext_16i8_to_16i16:
    792 ; SSE2:       # BB#0: # %entry
    793 ; SSE2-NEXT:    movdqa (%rdi), %xmm1
    794 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    795 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    796 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    797 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
    798 ; SSE2-NEXT:    retq
    799 ;
    800 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
    801 ; SSSE3:       # BB#0: # %entry
    802 ; SSSE3-NEXT:    movdqa (%rdi), %xmm1
    803 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    804 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    805 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    806 ; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
    807 ; SSSE3-NEXT:    retq
    808 ;
    809 ; SSE41-LABEL: load_zext_16i8_to_16i16:
    810 ; SSE41:       # BB#0: # %entry
    811 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    812 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    813 ; SSE41-NEXT:    retq
    814 ;
    815 ; AVX1-LABEL: load_zext_16i8_to_16i16:
    816 ; AVX1:       # BB#0: # %entry
    817 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    818 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    819 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    820 ; AVX1-NEXT:    retq
    821 ;
    822 ; AVX2-LABEL: load_zext_16i8_to_16i16:
    823 ; AVX2:       # BB#0: # %entry
    824 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    825 ; AVX2-NEXT:    retq
    826 ;
    827 ; AVX512-LABEL: load_zext_16i8_to_16i16:
    828 ; AVX512:       # BB#0: # %entry
    829 ; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    830 ; AVX512-NEXT:    retq
    831 entry:
    832  %X = load <16 x i8>, <16 x i8>* %ptr
    833  %Y = zext <16 x i8> %X to <16 x i16>
    834  ret <16 x i16> %Y
    835 }
    836 
    837 define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
    838 ; SSE2-LABEL: load_zext_2i16_to_2i64:
    839 ; SSE2:       # BB#0: # %entry
    840 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    841 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    842 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    843 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    844 ; SSE2-NEXT:    retq
    845 ;
    846 ; SSSE3-LABEL: load_zext_2i16_to_2i64:
    847 ; SSSE3:       # BB#0: # %entry
    848 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    849 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    850 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    851 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    852 ; SSSE3-NEXT:    retq
    853 ;
    854 ; SSE41-LABEL: load_zext_2i16_to_2i64:
    855 ; SSE41:       # BB#0: # %entry
    856 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    857 ; SSE41-NEXT:    retq
    858 ;
    859 ; AVX-LABEL: load_zext_2i16_to_2i64:
    860 ; AVX:       # BB#0: # %entry
    861 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    862 ; AVX-NEXT:    retq
    863 entry:
    864  %X = load <2 x i16>, <2 x i16>* %ptr
    865  %Y = zext <2 x i16> %X to <2 x i64>
    866  ret <2 x i64> %Y
    867 }
    868 
    869 define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
    870 ; SSE2-LABEL: load_zext_4i16_to_4i32:
    871 ; SSE2:       # BB#0: # %entry
    872 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    873 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    874 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    875 ; SSE2-NEXT:    retq
    876 ;
    877 ; SSSE3-LABEL: load_zext_4i16_to_4i32:
    878 ; SSSE3:       # BB#0: # %entry
    879 ; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    880 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
    881 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    882 ; SSSE3-NEXT:    retq
    883 ;
    884 ; SSE41-LABEL: load_zext_4i16_to_4i32:
    885 ; SSE41:       # BB#0: # %entry
    886 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    887 ; SSE41-NEXT:    retq
    888 ;
    889 ; AVX-LABEL: load_zext_4i16_to_4i32:
    890 ; AVX:       # BB#0: # %entry
    891 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    892 ; AVX-NEXT:    retq
    893 entry:
    894  %X = load <4 x i16>, <4 x i16>* %ptr
    895  %Y = zext <4 x i16> %X to <4 x i32>
    896  ret <4 x i32> %Y
    897 }
    898 
    899 define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
    900 ; SSE2-LABEL: load_zext_4i16_to_4i64:
    901 ; SSE2:       # BB#0: # %entry
    902 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    903 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    904 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    905 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    906 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    907 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    908 ; SSE2-NEXT:    retq
    909 ;
    910 ; SSSE3-LABEL: load_zext_4i16_to_4i64:
    911 ; SSSE3:       # BB#0: # %entry
    912 ; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    913 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    914 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    915 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    916 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    917 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    918 ; SSSE3-NEXT:    retq
    919 ;
    920 ; SSE41-LABEL: load_zext_4i16_to_4i64:
    921 ; SSE41:       # BB#0: # %entry
    922 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    923 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    924 ; SSE41-NEXT:    retq
    925 ;
    926 ; AVX1-LABEL: load_zext_4i16_to_4i64:
    927 ; AVX1:       # BB#0: # %entry
    928 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    929 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    930 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    931 ; AVX1-NEXT:    retq
    932 ;
    933 ; AVX2-LABEL: load_zext_4i16_to_4i64:
    934 ; AVX2:       # BB#0: # %entry
    935 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    936 ; AVX2-NEXT:    retq
    937 ;
    938 ; AVX512-LABEL: load_zext_4i16_to_4i64:
    939 ; AVX512:       # BB#0: # %entry
    940 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    941 ; AVX512-NEXT:    retq
    942 entry:
    943  %X = load <4 x i16>, <4 x i16>* %ptr
    944  %Y = zext <4 x i16> %X to <4 x i64>
    945  ret <4 x i64> %Y
    946 }
    947 
    948 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
    949 ; SSE2-LABEL: load_zext_8i16_to_8i32:
    950 ; SSE2:       # BB#0: # %entry
    951 ; SSE2-NEXT:    movdqa (%rdi), %xmm1
    952 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    953 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    954 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    955 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    956 ; SSE2-NEXT:    retq
    957 ;
    958 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
    959 ; SSSE3:       # BB#0: # %entry
    960 ; SSSE3-NEXT:    movdqa (%rdi), %xmm1
    961 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    962 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    963 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    964 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    965 ; SSSE3-NEXT:    retq
    966 ;
    967 ; SSE41-LABEL: load_zext_8i16_to_8i32:
    968 ; SSE41:       # BB#0: # %entry
    969 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    970 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    971 ; SSE41-NEXT:    retq
    972 ;
    973 ; AVX1-LABEL: load_zext_8i16_to_8i32:
    974 ; AVX1:       # BB#0: # %entry
    975 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    976 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    977 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    978 ; AVX1-NEXT:    retq
    979 ;
    980 ; AVX2-LABEL: load_zext_8i16_to_8i32:
    981 ; AVX2:       # BB#0: # %entry
    982 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    983 ; AVX2-NEXT:    retq
    984 ;
    985 ; AVX512-LABEL: load_zext_8i16_to_8i32:
    986 ; AVX512:       # BB#0: # %entry
    987 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    988 ; AVX512-NEXT:    retq
    989 entry:
    990  %X = load <8 x i16>, <8 x i16>* %ptr
    991  %Y = zext <8 x i16> %X to <8 x i32>
    992  ret <8 x i32> %Y
    993 }
    994 
    995 define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
    996 ; SSE2-LABEL: load_zext_2i32_to_2i64:
    997 ; SSE2:       # BB#0: # %entry
    998 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    999 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1000 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1001 ; SSE2-NEXT:    retq
   1002 ;
   1003 ; SSSE3-LABEL: load_zext_2i32_to_2i64:
   1004 ; SSSE3:       # BB#0: # %entry
   1005 ; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
   1006 ; SSSE3-NEXT:    pxor %xmm1, %xmm1
   1007 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1008 ; SSSE3-NEXT:    retq
   1009 ;
   1010 ; SSE41-LABEL: load_zext_2i32_to_2i64:
   1011 ; SSE41:       # BB#0: # %entry
   1012 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
   1013 ; SSE41-NEXT:    retq
   1014 ;
   1015 ; AVX-LABEL: load_zext_2i32_to_2i64:
   1016 ; AVX:       # BB#0: # %entry
   1017 ; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
   1018 ; AVX-NEXT:    retq
   1019 entry:
   1020  %X = load <2 x i32>, <2 x i32>* %ptr
   1021  %Y = zext <2 x i32> %X to <2 x i64>
   1022  ret <2 x i64> %Y
   1023 }
   1024 
   1025 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
   1026 ; SSE2-LABEL: load_zext_4i32_to_4i64:
   1027 ; SSE2:       # BB#0: # %entry
   1028 ; SSE2-NEXT:    movdqa (%rdi), %xmm1
   1029 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1030 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1031 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1032 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1033 ; SSE2-NEXT:    retq
   1034 ;
   1035 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
   1036 ; SSSE3:       # BB#0: # %entry
   1037 ; SSSE3-NEXT:    movdqa (%rdi), %xmm1
   1038 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1039 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1040 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1041 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1042 ; SSSE3-NEXT:    retq
   1043 ;
   1044 ; SSE41-LABEL: load_zext_4i32_to_4i64:
   1045 ; SSE41:       # BB#0: # %entry
   1046 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
   1047 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
   1048 ; SSE41-NEXT:    retq
   1049 ;
   1050 ; AVX1-LABEL: load_zext_4i32_to_4i64:
   1051 ; AVX1:       # BB#0: # %entry
   1052 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
   1053 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
   1054 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1055 ; AVX1-NEXT:    retq
   1056 ;
   1057 ; AVX2-LABEL: load_zext_4i32_to_4i64:
   1058 ; AVX2:       # BB#0: # %entry
   1059 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1060 ; AVX2-NEXT:    retq
   1061 ;
   1062 ; AVX512-LABEL: load_zext_4i32_to_4i64:
   1063 ; AVX512:       # BB#0: # %entry
   1064 ; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1065 ; AVX512-NEXT:    retq
   1066 entry:
   1067  %X = load <4 x i32>, <4 x i32>* %ptr
   1068  %Y = zext <4 x i32> %X to <4 x i64>
   1069  ret <4 x i64> %Y
   1070 }
   1071 
   1072 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
   1073 ; SSE2-LABEL: zext_8i8_to_8i32:
   1074 ; SSE2:       # BB#0: # %entry
   1075 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1076 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
   1077 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1078 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1079 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1080 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1081 ; SSE2-NEXT:    retq
   1082 ;
   1083 ; SSSE3-LABEL: zext_8i8_to_8i32:
   1084 ; SSSE3:       # BB#0: # %entry
   1085 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1086 ; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm1
   1087 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1088 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1089 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1090 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1091 ; SSSE3-NEXT:    retq
   1092 ;
   1093 ; SSE41-LABEL: zext_8i8_to_8i32:
   1094 ; SSE41:       # BB#0: # %entry
   1095 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1096 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
   1097 ; SSE41-NEXT:    pxor %xmm2, %xmm2
   1098 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
   1099 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1100 ; SSE41-NEXT:    retq
   1101 ;
   1102 ; AVX1-LABEL: zext_8i8_to_8i32:
   1103 ; AVX1:       # BB#0: # %entry
   1104 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
   1105 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1106 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1107 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1108 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1109 ; AVX1-NEXT:    retq
   1110 ;
   1111 ; AVX2-LABEL: zext_8i8_to_8i32:
   1112 ; AVX2:       # BB#0: # %entry
   1113 ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
   1114 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1115 ; AVX2-NEXT:    retq
   1116 ;
   1117 ; AVX512-LABEL: zext_8i8_to_8i32:
   1118 ; AVX512:       # BB#0: # %entry
   1119 ; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
   1120 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1121 ; AVX512-NEXT:    retq
   1122 entry:
   1123   %t = zext <8 x i8> %z to <8 x i32>
   1124   ret <8 x i32> %t
   1125 }
   1126 
   1127 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
   1128 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
   1129 ; SSE2:       # BB#0: # %entry
   1130 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1131 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1132 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1133 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1134 ; SSE2-NEXT:    retq
   1135 ;
   1136 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
   1137 ; SSSE3:       # BB#0: # %entry
   1138 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1139 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1140 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1141 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1142 ; SSSE3-NEXT:    retq
   1143 ;
   1144 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
   1145 ; SSE41:       # BB#0: # %entry
   1146 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1147 ; SSE41-NEXT:    pxor %xmm2, %xmm2
   1148 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
   1149 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1150 ; SSE41-NEXT:    retq
   1151 ;
   1152 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
   1153 ; AVX1:       # BB#0: # %entry
   1154 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1155 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1156 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1157 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1158 ; AVX1-NEXT:    retq
   1159 ;
   1160 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
   1161 ; AVX2:       # BB#0: # %entry
   1162 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1163 ; AVX2-NEXT:    retq
   1164 ;
   1165 ; AVX512-LABEL: shuf_zext_8i16_to_8i32:
   1166 ; AVX512:       # BB#0: # %entry
   1167 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1168 ; AVX512-NEXT:    retq
   1169 entry:
   1170   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
   1171   %Z = bitcast <16 x i16> %B to <8 x i32>
   1172   ret <8 x i32> %Z
   1173 }
   1174 
   1175 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
   1176 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
   1177 ; SSE2:       # BB#0: # %entry
   1178 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1179 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1180 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1181 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1182 ; SSE2-NEXT:    retq
   1183 ;
   1184 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
   1185 ; SSSE3:       # BB#0: # %entry
   1186 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1187 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1188 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
   1189 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1190 ; SSSE3-NEXT:    retq
   1191 ;
   1192 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
   1193 ; SSE41:       # BB#0: # %entry
   1194 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1195 ; SSE41-NEXT:    pxor %xmm2, %xmm2
   1196 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
   1197 ; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
   1198 ; SSE41-NEXT:    retq
   1199 ;
   1200 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
   1201 ; AVX1:       # BB#0: # %entry
   1202 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
   1203 ; AVX1-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1204 ; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
   1205 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
   1206 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1207 ; AVX1-NEXT:    retq
   1208 ;
   1209 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
   1210 ; AVX2:       # BB#0: # %entry
   1211 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1212 ; AVX2-NEXT:    retq
   1213 ;
   1214 ; AVX512-LABEL: shuf_zext_4i32_to_4i64:
   1215 ; AVX512:       # BB#0: # %entry
   1216 ; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1217 ; AVX512-NEXT:    retq
   1218 entry:
   1219   %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
   1220   %Z = bitcast <8 x i32> %B to <4 x i64>
   1221   ret <4 x i64> %Z
   1222 }
   1223 
   1224 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
   1225 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
   1226 ; SSE2:       # BB#0: # %entry
   1227 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1228 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
   1229 ; SSE2-NEXT:    packuswb %xmm1, %xmm1
   1230 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1231 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1232 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1233 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1234 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1235 ; SSE2-NEXT:    retq
   1236 ;
   1237 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
   1238 ; SSSE3:       # BB#0: # %entry
   1239 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1240 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0],zero,xmm1[2],zero,xmm1[4],zero,xmm1[6],zero,xmm1[8],zero,xmm1[10],zero,xmm1[12],zero,xmm1[14],zero
   1241 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1242 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1243 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1244 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1245 ; SSSE3-NEXT:    retq
   1246 ;
   1247 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
   1248 ; SSE41:       # BB#0: # %entry
   1249 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
   1250 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1251 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1252 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1253 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
   1254 ; SSE41-NEXT:    retq
   1255 ;
   1256 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
   1257 ; AVX1:       # BB#0: # %entry
   1258 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
   1259 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1260 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
   1261 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1262 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1263 ; AVX1-NEXT:    retq
   1264 ;
   1265 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
   1266 ; AVX2:       # BB#0: # %entry
   1267 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
   1268 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1269 ; AVX2-NEXT:    retq
   1270 ;
   1271 ; AVX512-LABEL: shuf_zext_8i8_to_8i32:
   1272 ; AVX512:       # BB#0: # %entry
   1273 ; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
   1274 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1275 ; AVX512-NEXT:    retq
   1276 entry:
   1277   %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
   1278   %Z = bitcast <32 x i8> %B to <8 x i32>
   1279   ret <8 x i32> %Z
   1280 }
   1281 
   1282 define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
   1283 ; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
   1284 ; SSE2:       # BB#0: # %entry
   1285 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1286 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1287 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1288 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1289 ; SSE2-NEXT:    retq
   1290 ;
   1291 ; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
   1292 ; SSSE3:       # BB#0: # %entry
   1293 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
   1294 ; SSSE3-NEXT:    retq
   1295 ;
   1296 ; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
   1297 ; SSE41:       # BB#0: # %entry
   1298 ; SSE41-NEXT:    psrlq $48, %xmm0
   1299 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1300 ; SSE41-NEXT:    retq
   1301 ;
   1302 ; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6:
   1303 ; AVX:       # BB#0: # %entry
   1304 ; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm0
   1305 ; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1306 ; AVX-NEXT:    retq
   1307 entry:
   1308   %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
   1309   %Z = bitcast <16 x i8> %B to <2 x i64>
   1310   ret <2 x i64> %Z
   1311 }
   1312 
   1313 define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
   1314 ; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1315 ; SSE2:       # BB#0: # %entry
   1316 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1317 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
   1318 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1319 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
   1320 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1321 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1322 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1323 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1324 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   1325 ; SSE2-NEXT:    retq
   1326 ;
   1327 ; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1328 ; SSSE3:       # BB#0: # %entry
   1329 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1330 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
   1331 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
   1332 ; SSSE3-NEXT:    retq
   1333 ;
   1334 ; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1335 ; SSE41:       # BB#0: # %entry
   1336 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1337 ; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1338 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
   1339 ; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1340 ; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1341 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
   1342 ; SSE41-NEXT:    retq
   1343 ;
   1344 ; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1345 ; AVX1:       # BB#0: # %entry
   1346 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1347 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
   1348 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1349 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1350 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1351 ; AVX1-NEXT:    retq
   1352 ;
   1353 ; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1354 ; AVX2:       # BB#0: # %entry
   1355 ; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1356 ; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   1357 ; AVX2-NEXT:    retq
   1358 ;
   1359 ; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11:
   1360 ; AVX512:       # BB#0: # %entry
   1361 ; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1362 ; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   1363 ; AVX512-NEXT:    retq
   1364 entry:
   1365   %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
   1366   %Z = bitcast <32 x i8> %B to <4 x i64>
   1367   ret <4 x i64> %Z
   1368 }
   1369 
   1370 define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
   1371 ; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
   1372 ; SSE2:       # BB#0: # %entry
   1373 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1374 ; SSE2-NEXT:    pxor %xmm1, %xmm1
   1375 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1376 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1377 ; SSE2-NEXT:    retq
   1378 ;
   1379 ; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
   1380 ; SSSE3:       # BB#0: # %entry
   1381 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
   1382 ; SSSE3-NEXT:    retq
   1383 ;
   1384 ; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
   1385 ; SSE41:       # BB#0: # %entry
   1386 ; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1387 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1388 ; SSE41-NEXT:    retq
   1389 ;
   1390 ; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6:
   1391 ; AVX:       # BB#0: # %entry
   1392 ; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1393 ; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1394 ; AVX-NEXT:    retq
   1395 entry:
   1396   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
   1397   %Z = bitcast <8 x i16> %B to <2 x i64>
   1398   ret <2 x i64> %Z
   1399 }
   1400 
   1401 define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
   1402 ; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1403 ; SSE2:       # BB#0: # %entry
   1404 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1405 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1406 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1407 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1408 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1409 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   1410 ; SSE2-NEXT:    retq
   1411 ;
   1412 ; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1413 ; SSSE3:       # BB#0: # %entry
   1414 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1415 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1416 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1417 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1418 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1419 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
   1420 ; SSSE3-NEXT:    retq
   1421 ;
   1422 ; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1423 ; SSE41:       # BB#0: # %entry
   1424 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1425 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
   1426 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1427 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1428 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
   1429 ; SSE41-NEXT:    retq
   1430 ;
   1431 ; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1432 ; AVX1:       # BB#0: # %entry
   1433 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1434 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
   1435 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
   1436 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1437 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1438 ; AVX1-NEXT:    retq
   1439 ;
   1440 ; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1441 ; AVX2:       # BB#0: # %entry
   1442 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
   1443 ; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1444 ; AVX2-NEXT:    retq
   1445 ;
   1446 ; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2:
   1447 ; AVX512:       # BB#0: # %entry
   1448 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
   1449 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1450 ; AVX512-NEXT:    retq
   1451 entry:
   1452   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
   1453   %Z = bitcast <16 x i16> %B to <4 x i64>
   1454   ret <4 x i64> %Z
   1455 }
   1456 
   1457 define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
   1458 ; SSE-LABEL: shuf_zext_8i16_to_4i32_offset1:
   1459 ; SSE:       # BB#0: # %entry
   1460 ; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1461 ; SSE-NEXT:    pxor %xmm1, %xmm1
   1462 ; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1463 ; SSE-NEXT:    retq
   1464 ;
   1465 ; AVX-LABEL: shuf_zext_8i16_to_4i32_offset1:
   1466 ; AVX:       # BB#0: # %entry
   1467 ; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
   1468 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1469 ; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1470 ; AVX-NEXT:    retq
   1471 entry:
   1472   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
   1473   %Z = bitcast <8 x i16> %B to <4 x i32>
   1474   ret <4 x i32> %Z
   1475 }
   1476 
   1477 define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
   1478 ; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1479 ; SSE2:       # BB#0: # %entry
   1480 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1481 ; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1482 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1483 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1484 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1485 ; SSE2-NEXT:    retq
   1486 ;
   1487 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1488 ; SSSE3:       # BB#0: # %entry
   1489 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1490 ; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1491 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1492 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1493 ; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1494 ; SSSE3-NEXT:    retq
   1495 ;
   1496 ; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1497 ; SSE41:       # BB#0: # %entry
   1498 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1499 ; SSE41-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1500 ; SSE41-NEXT:    pxor %xmm2, %xmm2
   1501 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
   1502 ; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1503 ; SSE41-NEXT:    retq
   1504 ;
   1505 ; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1506 ; AVX1:       # BB#0: # %entry
   1507 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
   1508 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   1509 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1510 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1511 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1512 ; AVX1-NEXT:    retq
   1513 ;
   1514 ; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1515 ; AVX2:       # BB#0: # %entry
   1516 ; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1517 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1518 ; AVX2-NEXT:    retq
   1519 ;
   1520 ; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3:
   1521 ; AVX512:       # BB#0: # %entry
   1522 ; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
   1523 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1524 ; AVX512-NEXT:    retq
   1525 entry:
   1526   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
   1527   %Z = bitcast <16 x i16> %B to <8 x i32>
   1528   ret <8 x i32> %Z
   1529 }
   1530 
   1531 define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
   1532 ; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
   1533 ; SSE2:       # BB#0: # %entry
   1534 ; SSE2-NEXT:    pxor %xmm2, %xmm2
   1535 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
   1536 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1537 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1538 ; SSE2-NEXT:    retq
   1539 ;
   1540 ; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
   1541 ; SSSE3:       # BB#0: # %entry
   1542 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
   1543 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
   1544 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
   1545 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
   1546 ; SSSE3-NEXT:    retq
   1547 ;
   1548 ; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
   1549 ; SSE41:       # BB#0: # %entry
   1550 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
   1551 ; SSE41-NEXT:    pxor %xmm2, %xmm2
   1552 ; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
   1553 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
   1554 ; SSE41-NEXT:    movdqa %xmm2, %xmm1
   1555 ; SSE41-NEXT:    retq
   1556 ;
   1557 ; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
   1558 ; AVX1:       # BB#0: # %entry
   1559 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1560 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
   1561 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   1562 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
   1563 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1564 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1565 ; AVX1-NEXT:    retq
   1566 ;
   1567 ; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
   1568 ; AVX2:       # BB#0: # %entry
   1569 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   1570 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1571 ; AVX2-NEXT:    retq
   1572 ;
   1573 ; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8:
   1574 ; AVX512:       # BB#0: # %entry
   1575 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
   1576 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1577 ; AVX512-NEXT:    retq
   1578 entry:
   1579   %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
   1580   %Z = bitcast <16 x i16> %B to <8 x i32>
   1581   ret <8 x i32> %Z
   1582 }
   1583 
   1584 define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
   1585 ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
   1586 ; SSE:       # BB#0: # %entry
   1587 ; SSE-NEXT:    pxor %xmm1, %xmm1
   1588 ; SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1589 ; SSE-NEXT:    retq
   1590 ;
   1591 ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
   1592 ; AVX:       # BB#0: # %entry
   1593 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1594 ; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1595 ; AVX-NEXT:    retq
   1596 entry:
   1597   %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
   1598   %Z = bitcast <4 x i32> %B to <2 x i64>
   1599   ret <2 x i64> %Z
   1600 }
   1601 
   1602 define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
   1603 ; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
   1604 ; SSE2:       # BB#0: # %entry
   1605 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
   1606 ; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
   1607 ; SSE2-NEXT:    pand %xmm1, %xmm0
   1608 ; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1609 ; SSE2-NEXT:    retq
   1610 ;
   1611 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
   1612 ; SSSE3:       # BB#0: # %entry
   1613 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
   1614 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
   1615 ; SSSE3-NEXT:    pand %xmm1, %xmm0
   1616 ; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1617 ; SSSE3-NEXT:    retq
   1618 ;
   1619 ; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
   1620 ; SSE41:       # BB#0: # %entry
   1621 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
   1622 ; SSE41-NEXT:    pxor %xmm0, %xmm0
   1623 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
   1624 ; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
   1625 ; SSE41-NEXT:    retq
   1626 ;
   1627 ; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
   1628 ; AVX1:       # BB#0: # %entry
   1629 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[3],zero,zero,zero
   1630 ; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1631 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
   1632 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1633 ; AVX1-NEXT:    retq
   1634 ;
   1635 ; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
   1636 ; AVX2:       # BB#0: # %entry
   1637 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
   1638 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1639 ; AVX2-NEXT:    retq
   1640 ;
   1641 ; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1:
   1642 ; AVX512:       # BB#0: # %entry
   1643 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
   1644 ; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1645 ; AVX512-NEXT:    retq
   1646 entry:
   1647   %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
   1648   %Z = bitcast <8 x i32> %B to <4 x i64>
   1649   ret <4 x i64> %Z
   1650 }
   1651