Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
      6 
      7 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
      8 ; SSE2-LABEL: zext_8i16_to_8i32:
      9 ; SSE2:       # BB#0: # %entry
     10 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
     11 ; SSE2-NEXT:    pxor %xmm2, %xmm2
     12 ; SSE2-NEXT:    # kill
     13 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
     14 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
     15 ; SSE2-NEXT:    pand .LCPI0_0(%rip), %xmm1
     16 ; SSE2-NEXT:    retq
     17 ;
     18 ; SSSE3-LABEL: zext_8i16_to_8i32:
     19 ; SSSE3:       # BB#0: # %entry
     20 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
     21 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
     22 ; SSSE3-NEXT:    # kill
     23 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
     24 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
     25 ; SSSE3-NEXT:    pand .LCPI0_0(%rip), %xmm1
     26 ; SSSE3-NEXT:    retq
     27 ;
     28 ; SSE41-LABEL: zext_8i16_to_8i32:
     29 ; SSE41:       # BB#0: # %entry
     30 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
     31 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
     32 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
     33 ; SSE41-NEXT:    pand .LCPI0_0(%rip), %xmm1
     34 ; SSE41-NEXT:    retq
     35 ;
     36 ; AVX1-LABEL: zext_8i16_to_8i32:
     37 ; AVX1:       # BB#0: # %entry
     38 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
     39 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
     40 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
     41 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     42 ; AVX1-NEXT:    retq
     43 ;
     44 ; AVX2-LABEL: zext_8i16_to_8i32:
     45 ; AVX2:       # BB#0: # %entry
     46 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
     47 ; AVX2-NEXT:    retq
     48 entry:
     49   %B = zext <8 x i16> %A to <8 x i32>
     50   ret <8 x i32>%B
     51 }
     52 
     53 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
     54 ; SSE2-LABEL: zext_4i32_to_4i64:
     55 ; SSE2:       # BB#0: # %entry
     56 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
     57 ; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
     58 ; SSE2-NEXT:    pand %xmm3, %xmm2
     59 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
     60 ; SSE2-NEXT:    pand %xmm3, %xmm1
     61 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
     62 ; SSE2-NEXT:    retq
     63 ;
     64 ; SSSE3-LABEL: zext_4i32_to_4i64:
     65 ; SSSE3:       # BB#0: # %entry
     66 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
     67 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
     68 ; SSSE3-NEXT:    pand %xmm3, %xmm2
     69 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
     70 ; SSSE3-NEXT:    pand %xmm3, %xmm1
     71 ; SSSE3-NEXT:    movdqa %xmm2, %xmm0
     72 ; SSSE3-NEXT:    retq
     73 ;
     74 ; SSE41-LABEL: zext_4i32_to_4i64:
     75 ; SSE41:       # BB#0: # %entry
     76 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
     77 ; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
     78 ; SSE41-NEXT:    pand %xmm3, %xmm2
     79 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
     80 ; SSE41-NEXT:    pand %xmm3, %xmm1
     81 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
     82 ; SSE41-NEXT:    retq
     83 ;
     84 ; AVX1-LABEL: zext_4i32_to_4i64:
     85 ; AVX1:       # BB#0: # %entry
     86 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
     87 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     88 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
     89 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     90 ; AVX1-NEXT:    retq
     91 ;
     92 ; AVX2-LABEL: zext_4i32_to_4i64:
     93 ; AVX2:       # BB#0: # %entry
     94 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
     95 ; AVX2-NEXT:    retq
     96 entry:
     97   %B = zext <4 x i32> %A to <4 x i64>
     98   ret <4 x i64>%B
     99 }
    100 
    101 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
    102 ; SSE2-LABEL: zext_8i8_to_8i32:
    103 ; SSE2:       # BB#0: # %entry
    104 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
    105 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
    106 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255]
    107 ; SSE2-NEXT:    pand %xmm1, %xmm2
    108 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    109 ; SSE2-NEXT:    pand %xmm0, %xmm1
    110 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
    111 ; SSE2-NEXT:    retq
    112 ;
    113 ; SSSE3-LABEL: zext_8i8_to_8i32:
    114 ; SSSE3:       # BB#0: # %entry
    115 ; SSSE3-NEXT:    movdqa %xmm0, %xmm2
    116 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
    117 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255]
    118 ; SSSE3-NEXT:    pand %xmm1, %xmm2
    119 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    120 ; SSSE3-NEXT:    pand %xmm0, %xmm1
    121 ; SSSE3-NEXT:    movdqa %xmm2, %xmm0
    122 ; SSSE3-NEXT:    retq
    123 ;
    124 ; SSE41-LABEL: zext_8i8_to_8i32:
    125 ; SSE41:       # BB#0: # %entry
    126 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    127 ; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255]
    128 ; SSE41-NEXT:    pand %xmm1, %xmm2
    129 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    130 ; SSE41-NEXT:    pand %xmm0, %xmm1
    131 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
    132 ; SSE41-NEXT:    retq
    133 ;
    134 ; AVX1-LABEL: zext_8i8_to_8i32:
    135 ; AVX1:       # BB#0: # %entry
    136 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    137 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    138 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    139 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    140 ; AVX1-NEXT:    retq
    141 ;
    142 ; AVX2-LABEL: zext_8i8_to_8i32:
    143 ; AVX2:       # BB#0: # %entry
    144 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    145 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
    146 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    147 ; AVX2-NEXT:    retq
    148 entry:
    149   %t = zext <8 x i8> %z to <8 x i32>
    150   ret <8 x i32> %t
    151 }
    152 
    153 ; PR17654
    154 define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
    155 ; SSE2-LABEL: zext_16i8_to_16i16:
    156 ; SSE2:       # BB#0: # %entry
    157 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    158 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    159 ; SSE2-NEXT:    # kill
    160 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    161 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    162 ; SSE2-NEXT:    pand .LCPI3_0(%rip), %xmm1
    163 ; SSE2-NEXT:    retq
    164 ;
    165 ; SSSE3-LABEL: zext_16i8_to_16i16:
    166 ; SSSE3:       # BB#0: # %entry
    167 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    168 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    169 ; SSSE3-NEXT:    # kill
    170 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    171 ; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    172 ; SSSE3-NEXT:    pand .LCPI3_0(%rip), %xmm1
    173 ; SSSE3-NEXT:    retq
    174 ;
    175 ; SSE41-LABEL: zext_16i8_to_16i16:
    176 ; SSE41:       # BB#0: # %entry
    177 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    178 ; SSE41-NEXT:    pmovzxbw %xmm1, %xmm0 {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    179 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    180 ; SSE41-NEXT:    pand .LCPI3_0(%rip), %xmm1
    181 ; SSE41-NEXT:    retq
    182 ;
    183 ; AVX1-LABEL: zext_16i8_to_16i16:
    184 ; AVX1:       # BB#0: # %entry
    185 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    186 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
    187 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    188 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    189 ; AVX1-NEXT:    retq
    190 ;
    191 ; AVX2-LABEL: zext_16i8_to_16i16:
    192 ; AVX2:       # BB#0: # %entry
    193 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    194 ; AVX2-NEXT:    retq
    195 entry:
    196   %t = zext <16 x i8> %z to <16 x i16>
    197   ret <16 x i16> %t
    198 }
    199 
    200 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
    201 ; SSE2-LABEL: load_zext_16i8_to_16i16:
    202 ; SSE2:        # BB#0: # %entry
    203 ; SSE2-NEXT:   movdqa (%rdi), %xmm1
    204 ; SSE2-NEXT:   pxor %xmm2, %xmm2
    205 ; SSE2-NEXT:   movdqa %xmm1, %xmm0
    206 ; SSE2-NEXT:   punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    207 ; SSE2-NEXT:   punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    208 ; SSE2-NEXT:   pand .LCPI4_0(%rip), %xmm1
    209 ; SSE2-NEXT:   retq
    210 ;
    211 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
    212 ; SSSE3:        # BB#0: # %entry
    213 ; SSSE3-NEXT:   movdqa (%rdi), %xmm1
    214 ; SSSE3-NEXT:   pxor %xmm2, %xmm2
    215 ; SSSE3-NEXT:   movdqa %xmm1, %xmm0
    216 ; SSSE3-NEXT:   punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    217 ; SSSE3-NEXT:   punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
    218 ; SSSE3-NEXT:   pand .LCPI4_0(%rip), %xmm1
    219 ; SSSE3-NEXT:   retq
    220 ;
    221 ; SSE41-LABEL: load_zext_16i8_to_16i16:
    222 ; SSE41:       # BB#0: # %entry
    223 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    224 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    225 ; SSE41-NEXT:    retq
    226 ;
    227 ; AVX1-LABEL: load_zext_16i8_to_16i16:
    228 ; AVX1:       # BB#0: # %entry
    229 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    230 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    231 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    232 ; AVX1-NEXT:    retq
    233 ;
    234 ; AVX2-LABEL: load_zext_16i8_to_16i16:
    235 ; AVX2:       # BB#0: # %entry
    236 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    237 ; AVX2-NEXT:    retq
    238 entry:
    239  %X = load <16 x i8>, <16 x i8>* %ptr
    240  %Y = zext <16 x i8> %X to <16 x i16>
    241  ret <16 x i16> %Y
    242 }
    243 
    244 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
    245 ; SSE2-LABEL: load_zext_8i16_to_8i32:
    246 ; SSE2:          # BB#0: # %entry
    247 ; SSE2-NEXT:   movdqa (%rdi), %xmm1
    248 ; SSE2-NEXT:   pxor %xmm2, %xmm2
    249 ; SSE2-NEXT:   movdqa %xmm1, %xmm0
    250 ; SSE2-NEXT:   punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    251 ; SSE2-NEXT:   punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
    252 ; SSE2-NEXT:   pand .LCPI5_0(%rip), %xmm1
    253 ; SSE2-NEXT:   retq
    254 ;
    255 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
    256 ; SSSE3:        # BB#0: # %entry
    257 ; SSSE3-NEXT:   movdqa (%rdi), %xmm1
    258 ; SSSE3-NEXT:   pxor %xmm2, %xmm2
    259 ; SSSE3-NEXT:   movdqa %xmm1, %xmm0
    260 ; SSSE3-NEXT:   punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    261 ; SSSE3-NEXT:   punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
    262 ; SSSE3-NEXT:   pand .LCPI5_0(%rip), %xmm1
    263 ; SSSE3-NEXT:   retq
    264 ;
    265 ; SSE41-LABEL: load_zext_8i16_to_8i32:
    266 ; SSE41:       # BB#0: # %entry
    267 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    268 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    269 ; SSE41-NEXT:    retq
    270 ;
    271 ; AVX1-LABEL: load_zext_8i16_to_8i32:
    272 ; AVX1:       # BB#0: # %entry
    273 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    274 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    275 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    276 ; AVX1-NEXT:    retq
    277 ;
    278 ; AVX2-LABEL: load_zext_8i16_to_8i32:
    279 ; AVX2:       # BB#0: # %entry
    280 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    281 ; AVX2-NEXT:    retq
    282 entry:
    283  %X = load <8 x i16>, <8 x i16>* %ptr
    284  %Y = zext <8 x i16> %X to <8 x i32>
    285  ret <8 x i32>%Y
    286 }
    287 
    288 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
    289 ; SSE2-LABEL: load_zext_4i32_to_4i64:
    290 ; SSE2:       # BB#0: # %entry
    291 ; SSE2-NEXT:    movdqa (%rdi), %xmm1
    292 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
    293 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
    294 ; SSE2-NEXT:    pand %xmm2, %xmm0
    295 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
    296 ; SSE2-NEXT:    pand %xmm2, %xmm1
    297 ; SSE2-NEXT:    retq
    298 ;
    299 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
    300 ; SSSE3:       # BB#0: # %entry
    301 ; SSSE3-NEXT:    movdqa (%rdi), %xmm1
    302 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
    303 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
    304 ; SSSE3-NEXT:    pand %xmm2, %xmm0
    305 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
    306 ; SSSE3-NEXT:    pand %xmm2, %xmm1
    307 ; SSSE3-NEXT:    retq
    308 ;
    309 ; SSE41-LABEL: load_zext_4i32_to_4i64:
    310 ; SSE41:       # BB#0: # %entry
    311 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
    312 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
    313 ; SSE41-NEXT:    retq
    314 ;
    315 ; AVX1-LABEL: load_zext_4i32_to_4i64:
    316 ; AVX1:       # BB#0: # %entry
    317 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
    318 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
    319 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    320 ; AVX1-NEXT:    retq
    321 ;
    322 ; AVX2-LABEL: load_zext_4i32_to_4i64:
    323 ; AVX2:       # BB#0: # %entry
    324 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    325 ; AVX2-NEXT:    retq
    326 entry:
    327  %X = load <4 x i32>, <4 x i32>* %ptr
    328  %Y = zext <4 x i32> %X to <4 x i64>
    329  ret <4 x i64>%Y
    330 }
    331 
    332 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
    333 ; SSE2-LABEL: shuf_zext_8i16_to_8i32:
    334 ; SSE2:       # BB#0: # %entry
    335 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    336 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    337 ; SSE2-NEXT:    # kill
    338 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    339 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    340 ; SSE2-NEXT:    retq
    341 ;
    342 ; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
    343 ; SSSE3:       # BB#0: # %entry
    344 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    345 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    346 ; SSSE3-NEXT:    # kill
    347 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    348 ; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    349 ; SSSE3-NEXT:    retq
    350 ;
    351 ; SSE41-LABEL: shuf_zext_8i16_to_8i32:
    352 ; SSE41:       # BB#0: # %entry
    353 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    354 ; SSE41-NEXT:    pxor %xmm2, %xmm2
    355 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
    356 ; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
    357 ; SSE41-NEXT:    retq
    358 ;
    359 ; AVX1-LABEL: shuf_zext_8i16_to_8i32:
    360 ; AVX1:       # BB#0: # %entry
    361 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    362 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
    363 ; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    364 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    365 ; AVX1-NEXT:    retq
    366 ;
    367 ; AVX2-LABEL: shuf_zext_8i16_to_8i32:
    368 ; AVX2:       # BB#0: # %entry
    369 ; AVX2-NEXT:    # kill
    370 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    371 ; AVX2-NEXT:    retq
    372 entry:
    373   %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
    374   %Z = bitcast <16 x i16> %B to <8 x i32>
    375   ret <8 x i32> %Z
    376 }
    377 
    378 define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
    379 ; SSE2-LABEL: shuf_zext_4i32_to_4i64:
    380 ; SSE2:       # BB#0: # %entry
    381 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    382 ; SSE2-NEXT:    pxor %xmm2, %xmm2
    383 ; SSE2-NEXT:    # kill
    384 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    385 ; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    386 ; SSE2-NEXT:    retq
    387 ;
    388 ; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
    389 ; SSSE3:       # BB#0: # %entry
    390 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    391 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    392 ; SSSE3-NEXT:    # kill
    393 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
    394 ; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    395 ; SSSE3-NEXT:    retq
    396 ;
    397 ; SSE41-LABEL: shuf_zext_4i32_to_4i64:
    398 ; SSE41:       # BB#0: # %entry
    399 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    400 ; SSE41-NEXT:    pxor %xmm2, %xmm2
    401 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
    402 ; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
    403 ; SSE41-NEXT:    retq
    404 ;
    405 ; AVX1-LABEL: shuf_zext_4i32_to_4i64:
    406 ; AVX1:       # BB#0: # %entry
    407 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
    408 ; AVX1-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    409 ; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
    410 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
    411 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    412 ; AVX1-NEXT:    retq
    413 ;
    414 ; AVX2-LABEL: shuf_zext_4i32_to_4i64:
    415 ; AVX2:       # BB#0: # %entry
    416 ; AVX2-NEXT:    # kill
    417 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    418 ; AVX2-NEXT:    retq
    419 entry:
    420   %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
    421   %Z = bitcast <8 x i32> %B to <4 x i64>
    422   ret <4 x i64> %Z
    423 }
    424 
    425 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
    426 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
    427 ; SSE2:       # BB#0: # %entry
    428 ; SSE2-NEXT:    pand .LCPI9_0(%rip), %xmm0
    429 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
    430 ; SSE2-NEXT:    pxor %xmm1, %xmm1
    431 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
    432 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
    433 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
    434 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    435 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
    436 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
    437 ; SSE2-NEXT:    pandn %xmm0, %xmm1
    438 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
    439 ; SSE2-NEXT:    retq
    440 ;
    441 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
    442 ; SSSE3:       # BB#0: # %entry
    443 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
    444 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    445 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
    446 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
    447 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
    448 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
    449 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
    450 ; SSSE3-NEXT:    retq
    451 ;
    452 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
    453 ; SSE41:       # BB#0: # %entry
    454 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    455 ; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    456 ; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
    457 ; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
    458 ; SSE41-NEXT:    retq
    459 ;
    460 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
    461 ; AVX1:       # BB#0: # %entry
    462 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    463 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    464 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    465 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    466 ; AVX1-NEXT:    retq
    467 ;
    468 ; AVX2-LABEL: shuf_zext_8i8_to_8i32:
    469 ; AVX2:       # BB#0: # %entry
    470 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
    471 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    472 ; AVX2-NEXT:    retq
    473 entry:
    474   %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
    475   %Z = bitcast <32 x i8> %B to <8 x i32>
    476   ret <8 x i32> %Z
    477 }
    478