Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
      4 
      5 define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
      6 ; KNL-LABEL: zext_8x8mem_to_8x16:
      7 ; KNL:       ## BB#0:
      8 ; KNL-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
      9 ; KNL-NEXT:    vpsllw $15, %xmm0, %xmm0
     10 ; KNL-NEXT:    vpsraw $15, %xmm0, %xmm0
     11 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
     12 ; KNL-NEXT:    retq
     13 ;
     14 ; SKX-LABEL: zext_8x8mem_to_8x16:
     15 ; SKX:       ## BB#0:
     16 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
     17 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
     18 ; SKX-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
     19 ; SKX-NEXT:    retq
     20   %a   = load <8 x i8>,<8 x i8> *%i,align 1
     21   %x   = zext <8 x i8> %a to <8 x i16>
     22   %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
     23   ret <8 x i16> %ret
     24 }
     25 
     26 define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
     27 ; KNL-LABEL: sext_8x8mem_to_8x16:
     28 ; KNL:       ## BB#0:
     29 ; KNL-NEXT:    vpmovsxbw (%rdi), %xmm1
     30 ; KNL-NEXT:    vpsllw $15, %xmm0, %xmm0
     31 ; KNL-NEXT:    vpsraw $15, %xmm0, %xmm0
     32 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
     33 ; KNL-NEXT:    retq
     34 ;
     35 ; SKX-LABEL: sext_8x8mem_to_8x16:
     36 ; SKX:       ## BB#0:
     37 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
     38 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
     39 ; SKX-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z}
     40 ; SKX-NEXT:    retq
     41   %a   = load <8 x i8>,<8 x i8> *%i,align 1
     42   %x   = sext <8 x i8> %a to <8 x i16>
     43   %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
     44   ret <8 x i16> %ret
     45 }
     46 
     47 
     48 define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
     49 ; KNL-LABEL: zext_16x8mem_to_16x16:
     50 ; KNL:       ## BB#0:
     51 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
     52 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
     53 ; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
     54 ; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
     55 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
     56 ; KNL-NEXT:    retq
     57 ;
     58 ; SKX-LABEL: zext_16x8mem_to_16x16:
     59 ; SKX:       ## BB#0:
     60 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
     61 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
     62 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
     63 ; SKX-NEXT:    retq
     64   %a   = load <16 x i8>,<16 x i8> *%i,align 1
     65   %x   = zext <16 x i8> %a to <16 x i16>
     66   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
     67   ret <16 x i16> %ret
     68 }
     69 
     70 define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
     71 ; KNL-LABEL: sext_16x8mem_to_16x16:
     72 ; KNL:       ## BB#0:
     73 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
     74 ; KNL-NEXT:    vpmovsxbw (%rdi), %ymm1
     75 ; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
     76 ; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
     77 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
     78 ; KNL-NEXT:    retq
     79 ;
     80 ; SKX-LABEL: sext_16x8mem_to_16x16:
     81 ; SKX:       ## BB#0:
     82 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
     83 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
     84 ; SKX-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z}
     85 ; SKX-NEXT:    retq
     86   %a   = load <16 x i8>,<16 x i8> *%i,align 1
     87   %x   = sext <16 x i8> %a to <16 x i16>
     88   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
     89   ret <16 x i16> %ret
     90 }
     91 
     92 define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
     93 ; ALL-LABEL: zext_16x8_to_16x16:
     94 ; ALL:       ## BB#0:
     95 ; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
     96 ; ALL-NEXT:    retq
     97   %x   = zext <16 x i8> %a to <16 x i16>
     98   ret <16 x i16> %x
     99 }
    100 
    101 define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
    102 ; KNL-LABEL: zext_16x8_to_16x16_mask:
    103 ; KNL:       ## BB#0:
    104 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    105 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    106 ; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
    107 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
    108 ; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
    109 ; KNL-NEXT:    retq
    110 ;
    111 ; SKX-LABEL: zext_16x8_to_16x16_mask:
    112 ; SKX:       ## BB#0:
    113 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
    114 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
    115 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    116 ; SKX-NEXT:    retq
    117   %x   = zext <16 x i8> %a to <16 x i16>
    118   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
    119   ret <16 x i16> %ret
    120 }
    121 
    122 define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
    123 ; ALL-LABEL: sext_16x8_to_16x16:
    124 ; ALL:       ## BB#0:
    125 ; ALL-NEXT:    vpmovsxbw %xmm0, %ymm0
    126 ; ALL-NEXT:    retq
    127   %x   = sext <16 x i8> %a to <16 x i16>
    128   ret <16 x i16> %x
    129 }
    130 
    131 define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
    132 ; KNL-LABEL: sext_16x8_to_16x16_mask:
    133 ; KNL:       ## BB#0:
    134 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    135 ; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
    136 ; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
    137 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
    138 ; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
    139 ; KNL-NEXT:    retq
    140 ;
    141 ; SKX-LABEL: sext_16x8_to_16x16_mask:
    142 ; SKX:       ## BB#0:
    143 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
    144 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
    145 ; SKX-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z}
    146 ; SKX-NEXT:    retq
    147   %x   = sext <16 x i8> %a to <16 x i16>
    148   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
    149   ret <16 x i16> %ret
    150 }
    151 
    152 define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
    153 ; KNL-LABEL: zext_32x8mem_to_32x16:
    154 ; KNL:       ## BB#0:
    155 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    156 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    157 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    158 ; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
    159 ; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
    160 ; KNL-NEXT:    vpand %ymm2, %ymm3, %ymm2
    161 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    162 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    163 ; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
    164 ; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
    165 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm1
    166 ; KNL-NEXT:    vmovaps %zmm2, %zmm0
    167 ; KNL-NEXT:    retq
    168 ;
    169 ; SKX-LABEL: zext_32x8mem_to_32x16:
    170 ; SKX:       ## BB#0:
    171 ; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
    172 ; SKX-NEXT:    vpmovb2m %ymm0, %k1
    173 ; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero
    174 ; SKX-NEXT:    retq
    175   %a   = load <32 x i8>,<32 x i8> *%i,align 1
    176   %x   = zext <32 x i8> %a to <32 x i16>
    177   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
    178   ret <32 x i16> %ret
    179 }
    180 
    181 define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
    182 ; KNL-LABEL: sext_32x8mem_to_32x16:
    183 ; KNL:       ## BB#0:
    184 ; KNL-NEXT:    vpmovsxbw 16(%rdi), %ymm1
    185 ; KNL-NEXT:    vpmovsxbw (%rdi), %ymm2
    186 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    187 ; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
    188 ; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
    189 ; KNL-NEXT:    vpand %ymm2, %ymm3, %ymm2
    190 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    191 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    192 ; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
    193 ; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
    194 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm1
    195 ; KNL-NEXT:    vmovaps %zmm2, %zmm0
    196 ; KNL-NEXT:    retq
    197 ;
    198 ; SKX-LABEL: sext_32x8mem_to_32x16:
    199 ; SKX:       ## BB#0:
    200 ; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
    201 ; SKX-NEXT:    vpmovb2m %ymm0, %k1
    202 ; SKX-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z}
    203 ; SKX-NEXT:    retq
    204   %a   = load <32 x i8>,<32 x i8> *%i,align 1
    205   %x   = sext <32 x i8> %a to <32 x i16>
    206   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
    207   ret <32 x i16> %ret
    208 }
    209 
    210 define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
    211 ; KNL-LABEL: zext_32x8_to_32x16:
    212 ; KNL:       ## BB#0:
    213 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    214 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    215 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    216 ; KNL-NEXT:    vmovaps %zmm2, %zmm0
    217 ; KNL-NEXT:    retq
    218 ;
    219 ; SKX-LABEL: zext_32x8_to_32x16:
    220 ; SKX:       ## BB#0:
    221 ; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
    222 ; SKX-NEXT:    retq
    223   %x   = zext <32 x i8> %a to <32 x i16>
    224   ret <32 x i16> %x
    225 }
    226 
    227 define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
    228 ; KNL-LABEL: zext_32x8_to_32x16_mask:
    229 ; KNL:       ## BB#0:
    230 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm2
    231 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
    232 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    233 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    234 ; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
    235 ; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
    236 ; KNL-NEXT:    vpand %ymm0, %ymm3, %ymm0
    237 ; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm1
    238 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    239 ; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
    240 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
    241 ; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
    242 ; KNL-NEXT:    retq
    243 ;
    244 ; SKX-LABEL: zext_32x8_to_32x16_mask:
    245 ; SKX:       ## BB#0:
    246 ; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
    247 ; SKX-NEXT:    vpmovb2m %ymm1, %k1
    248 ; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
    249 ; SKX-NEXT:    retq
    250   %x   = zext <32 x i8> %a to <32 x i16>
    251   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
    252   ret <32 x i16> %ret
    253 }
    254 
    255 define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
    256 ; KNL-LABEL: sext_32x8_to_32x16:
    257 ; KNL:       ## BB#0:
    258 ; KNL-NEXT:    vpmovsxbw %xmm0, %ymm2
    259 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    260 ; KNL-NEXT:    vpmovsxbw %xmm0, %ymm1
    261 ; KNL-NEXT:    vmovaps %zmm2, %zmm0
    262 ; KNL-NEXT:    retq
    263 ;
    264 ; SKX-LABEL: sext_32x8_to_32x16:
    265 ; SKX:       ## BB#0:
    266 ; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0
    267 ; SKX-NEXT:    retq
    268   %x   = sext <32 x i8> %a to <32 x i16>
    269   ret <32 x i16> %x
    270 }
    271 
    272 define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
    273 ; KNL-LABEL: sext_32x8_to_32x16_mask:
    274 ; KNL:       ## BB#0:
    275 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm2
    276 ; KNL-NEXT:    vpmovsxbw %xmm2, %ymm2
    277 ; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
    278 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    279 ; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
    280 ; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
    281 ; KNL-NEXT:    vpand %ymm0, %ymm3, %ymm0
    282 ; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm1
    283 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    284 ; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
    285 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
    286 ; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
    287 ; KNL-NEXT:    retq
    288 ;
    289 ; SKX-LABEL: sext_32x8_to_32x16_mask:
    290 ; SKX:       ## BB#0:
    291 ; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
    292 ; SKX-NEXT:    vpmovb2m %ymm1, %k1
    293 ; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z}
    294 ; SKX-NEXT:    retq
    295   %x   = sext <32 x i8> %a to <32 x i16>
    296   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
    297   ret <32 x i16> %ret
    298 }
    299 
    300 define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
    301 ; KNL-LABEL: zext_4x8mem_to_4x32:
    302 ; KNL:       ## BB#0:
    303 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    304 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    305 ; KNL-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    306 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
    307 ; KNL-NEXT:    retq
    308 ;
    309 ; SKX-LABEL: zext_4x8mem_to_4x32:
    310 ; SKX:       ## BB#0:
    311 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    312 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
    313 ; SKX-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    314 ; SKX-NEXT:    retq
    315   %a   = load <4 x i8>,<4 x i8> *%i,align 1
    316   %x   = zext <4 x i8> %a to <4 x i32>
    317   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
    318   ret <4 x i32> %ret
    319 }
    320 
    321 define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
    322 ; KNL-LABEL: sext_4x8mem_to_4x32:
    323 ; KNL:       ## BB#0:
    324 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    325 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    326 ; KNL-NEXT:    vpmovsxbd (%rdi), %xmm1
    327 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
    328 ; KNL-NEXT:    retq
    329 ;
    330 ; SKX-LABEL: sext_4x8mem_to_4x32:
    331 ; SKX:       ## BB#0:
    332 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    333 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
    334 ; SKX-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z}
    335 ; SKX-NEXT:    retq
    336   %a   = load <4 x i8>,<4 x i8> *%i,align 1
    337   %x   = sext <4 x i8> %a to <4 x i32>
    338   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
    339   ret <4 x i32> %ret
    340 }
    341 
    342 define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
    343 ; KNL-LABEL: zext_8x8mem_to_8x32:
    344 ; KNL:       ## BB#0:
    345 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    346 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    347 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    348 ; KNL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    349 ; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
    350 ; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    351 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
    352 ; KNL-NEXT:    retq
    353 ;
    354 ; SKX-LABEL: zext_8x8mem_to_8x32:
    355 ; SKX:       ## BB#0:
    356 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    357 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    358 ; SKX-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    359 ; SKX-NEXT:    retq
    360   %a   = load <8 x i8>,<8 x i8> *%i,align 1
    361   %x   = zext <8 x i8> %a to <8 x i32>
    362   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    363   ret <8 x i32> %ret
    364 }
    365 
    366 define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
    367 ; KNL-LABEL: sext_8x8mem_to_8x32:
    368 ; KNL:       ## BB#0:
    369 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    370 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    371 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    372 ; KNL-NEXT:    vpmovsxbd (%rdi), %ymm0
    373 ; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
    374 ; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    375 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
    376 ; KNL-NEXT:    retq
    377 ;
    378 ; SKX-LABEL: sext_8x8mem_to_8x32:
    379 ; SKX:       ## BB#0:
    380 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    381 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    382 ; SKX-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z}
    383 ; SKX-NEXT:    retq
    384   %a   = load <8 x i8>,<8 x i8> *%i,align 1
    385   %x   = sext <8 x i8> %a to <8 x i32>
    386   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    387   ret <8 x i32> %ret
    388 }
    389 
    390 define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
    391 ; KNL-LABEL: zext_16x8mem_to_16x32:
    392 ; KNL:       ## BB#0:
    393 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    394 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    395 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    396 ; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
    397 ; KNL-NEXT:    retq
    398 ;
    399 ; SKX-LABEL: zext_16x8mem_to_16x32:
    400 ; SKX:       ## BB#0:
    401 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
    402 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
    403 ; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
    404 ; SKX-NEXT:    retq
    405   %a   = load <16 x i8>,<16 x i8> *%i,align 1
    406   %x   = zext <16 x i8> %a to <16 x i32>
    407   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    408   ret <16 x i32> %ret
    409 }
    410 
    411 define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
    412 ; KNL-LABEL: sext_16x8mem_to_16x32:
    413 ; KNL:       ## BB#0:
    414 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    415 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    416 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    417 ; KNL-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
    418 ; KNL-NEXT:    retq
    419 ;
    420 ; SKX-LABEL: sext_16x8mem_to_16x32:
    421 ; SKX:       ## BB#0:
    422 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
    423 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
    424 ; SKX-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
    425 ; SKX-NEXT:    retq
    426   %a   = load <16 x i8>,<16 x i8> *%i,align 1
    427   %x   = sext <16 x i8> %a to <16 x i32>
    428   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    429   ret <16 x i32> %ret
    430 }
    431 
    432 define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
    433 ; KNL-LABEL: zext_16x8_to_16x32_mask:
    434 ; KNL:       ## BB#0:
    435 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
    436 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
    437 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
    438 ; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    439 ; KNL-NEXT:    retq
    440 ;
    441 ; SKX-LABEL: zext_16x8_to_16x32_mask:
    442 ; SKX:       ## BB#0:
    443 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
    444 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
    445 ; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    446 ; SKX-NEXT:    retq
    447   %x   = zext <16 x i8> %a to <16 x i32>
    448   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    449   ret <16 x i32> %ret
    450 }
    451 
    452 define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
    453 ; KNL-LABEL: sext_16x8_to_16x32_mask:
    454 ; KNL:       ## BB#0:
    455 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
    456 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
    457 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
    458 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
    459 ; KNL-NEXT:    retq
    460 ;
    461 ; SKX-LABEL: sext_16x8_to_16x32_mask:
    462 ; SKX:       ## BB#0:
    463 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
    464 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
    465 ; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
    466 ; SKX-NEXT:    retq
    467   %x   = sext <16 x i8> %a to <16 x i32>
    468   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    469   ret <16 x i32> %ret
    470 }
    471 
    472 define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
    473 ; ALL-LABEL: zext_16x8_to_16x32:
    474 ; ALL:       ## BB#0:
    475 ; ALL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    476 ; ALL-NEXT:    retq
    477   %x = zext <16 x i8> %i to <16 x i32>
    478   ret <16 x i32> %x
    479 }
    480 
    481 define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
    482 ; ALL-LABEL: sext_16x8_to_16x32:
    483 ; ALL:       ## BB#0:
    484 ; ALL-NEXT:    vpmovsxbd %xmm0, %zmm0
    485 ; ALL-NEXT:    retq
    486   %x = sext <16 x i8> %i to <16 x i32>
    487   ret <16 x i32> %x
    488 }
    489 
    490 define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
    491 ; KNL-LABEL: zext_2x8mem_to_2x64:
    492 ; KNL:       ## BB#0:
    493 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
    494 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    495 ; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
    496 ; KNL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    497 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
    498 ; KNL-NEXT:    retq
    499 ;
    500 ; SKX-LABEL: zext_2x8mem_to_2x64:
    501 ; SKX:       ## BB#0:
    502 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
    503 ; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
    504 ; SKX-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    505 ; SKX-NEXT:    retq
    506   %a   = load <2 x i8>,<2 x i8> *%i,align 1
    507   %x   = zext <2 x i8> %a to <2 x i64>
    508   %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
    509   ret <2 x i64> %ret
    510 }
    511 define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
    512 ; KNL-LABEL: sext_2x8mem_to_2x64mask:
    513 ; KNL:       ## BB#0:
    514 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
    515 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    516 ; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
    517 ; KNL-NEXT:    vpmovsxbq (%rdi), %xmm1
    518 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
    519 ; KNL-NEXT:    retq
    520 ;
    521 ; SKX-LABEL: sext_2x8mem_to_2x64mask:
    522 ; SKX:       ## BB#0:
    523 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
    524 ; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
    525 ; SKX-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z}
    526 ; SKX-NEXT:    retq
    527   %a   = load <2 x i8>,<2 x i8> *%i,align 1
    528   %x   = sext <2 x i8> %a to <2 x i64>
    529   %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
    530   ret <2 x i64> %ret
    531 }
    532 define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
    533 ; ALL-LABEL: sext_2x8mem_to_2x64:
    534 ; ALL:       ## BB#0:
    535 ; ALL-NEXT:    vpmovsxbq (%rdi), %xmm0
    536 ; ALL-NEXT:    retq
    537   %a   = load <2 x i8>,<2 x i8> *%i,align 1
    538   %x   = sext <2 x i8> %a to <2 x i64>
    539   ret <2 x i64> %x
    540 }
    541 
    542 define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
    543 ; KNL-LABEL: zext_4x8mem_to_4x64:
    544 ; KNL:       ## BB#0:
    545 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    546 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    547 ; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
    548 ; KNL-NEXT:    vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
    549 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
    550 ; KNL-NEXT:    retq
    551 ;
    552 ; SKX-LABEL: zext_4x8mem_to_4x64:
    553 ; SKX:       ## BB#0:
    554 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    555 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
    556 ; SKX-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
    557 ; SKX-NEXT:    retq
    558   %a   = load <4 x i8>,<4 x i8> *%i,align 1
    559   %x   = zext <4 x i8> %a to <4 x i64>
    560   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
    561   ret <4 x i64> %ret
    562 }
    563 
    564 define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
    565 ; KNL-LABEL: sext_4x8mem_to_4x64mask:
    566 ; KNL:       ## BB#0:
    567 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    568 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    569 ; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
    570 ; KNL-NEXT:    vpmovsxbq (%rdi), %ymm1
    571 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
    572 ; KNL-NEXT:    retq
    573 ;
    574 ; SKX-LABEL: sext_4x8mem_to_4x64mask:
    575 ; SKX:       ## BB#0:
    576 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    577 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
    578 ; SKX-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z}
    579 ; SKX-NEXT:    retq
    580   %a   = load <4 x i8>,<4 x i8> *%i,align 1
    581   %x   = sext <4 x i8> %a to <4 x i64>
    582   %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
    583   ret <4 x i64> %ret
    584 }
    585 
    586 define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
    587 ; ALL-LABEL: sext_4x8mem_to_4x64:
    588 ; ALL:       ## BB#0:
    589 ; ALL-NEXT:    vpmovsxbq (%rdi), %ymm0
    590 ; ALL-NEXT:    retq
    591   %a   = load <4 x i8>,<4 x i8> *%i,align 1
    592   %x   = sext <4 x i8> %a to <4 x i64>
    593   ret <4 x i64> %x
    594 }
    595 
    596 define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
    597 ; KNL-LABEL: zext_8x8mem_to_8x64:
    598 ; KNL:       ## BB#0:
    599 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    600 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    601 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    602 ; KNL-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
    603 ; KNL-NEXT:    retq
    604 ;
    605 ; SKX-LABEL: zext_8x8mem_to_8x64:
    606 ; SKX:       ## BB#0:
    607 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    608 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    609 ; SKX-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
    610 ; SKX-NEXT:    retq
    611   %a   = load <8 x i8>,<8 x i8> *%i,align 1
    612   %x   = zext <8 x i8> %a to <8 x i64>
    613   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
    614   ret <8 x i64> %ret
    615 }
    616 
    617 define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
    618 ; KNL-LABEL: sext_8x8mem_to_8x64mask:
    619 ; KNL:       ## BB#0:
    620 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    621 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    622 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    623 ; KNL-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
    624 ; KNL-NEXT:    retq
    625 ;
    626 ; SKX-LABEL: sext_8x8mem_to_8x64mask:
    627 ; SKX:       ## BB#0:
    628 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    629 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    630 ; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
    631 ; SKX-NEXT:    retq
    632   %a   = load <8 x i8>,<8 x i8> *%i,align 1
    633   %x   = sext <8 x i8> %a to <8 x i64>
    634   %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
    635   ret <8 x i64> %ret
    636 }
    637 
    638 define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
    639 ; ALL-LABEL: sext_8x8mem_to_8x64:
    640 ; ALL:       ## BB#0:
    641 ; ALL-NEXT:    vpmovsxbq (%rdi), %zmm0
    642 ; ALL-NEXT:    retq
    643   %a   = load <8 x i8>,<8 x i8> *%i,align 1
    644   %x   = sext <8 x i8> %a to <8 x i64>
    645   ret <8 x i64> %x
    646 }
    647 
    648 define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
    649 ; KNL-LABEL: zext_4x16mem_to_4x32:
    650 ; KNL:       ## BB#0:
    651 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    652 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    653 ; KNL-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    654 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
    655 ; KNL-NEXT:    retq
    656 ;
    657 ; SKX-LABEL: zext_4x16mem_to_4x32:
    658 ; SKX:       ## BB#0:
    659 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    660 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
    661 ; SKX-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    662 ; SKX-NEXT:    retq
    663   %a   = load <4 x i16>,<4 x i16> *%i,align 1
    664   %x   = zext <4 x i16> %a to <4 x i32>
    665   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
    666   ret <4 x i32> %ret
    667 }
    668 
    669 define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
    670 ; KNL-LABEL: sext_4x16mem_to_4x32mask:
    671 ; KNL:       ## BB#0:
    672 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    673 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    674 ; KNL-NEXT:    vpmovsxwd (%rdi), %xmm1
    675 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
    676 ; KNL-NEXT:    retq
    677 ;
    678 ; SKX-LABEL: sext_4x16mem_to_4x32mask:
    679 ; SKX:       ## BB#0:
    680 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    681 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
    682 ; SKX-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z}
    683 ; SKX-NEXT:    retq
    684   %a   = load <4 x i16>,<4 x i16> *%i,align 1
    685   %x   = sext <4 x i16> %a to <4 x i32>
    686   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
    687   ret <4 x i32> %ret
    688 }
    689 
    690 define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
    691 ; ALL-LABEL: sext_4x16mem_to_4x32:
    692 ; ALL:       ## BB#0:
    693 ; ALL-NEXT:    vpmovsxwd (%rdi), %xmm0
    694 ; ALL-NEXT:    retq
    695   %a   = load <4 x i16>,<4 x i16> *%i,align 1
    696   %x   = sext <4 x i16> %a to <4 x i32>
    697   ret <4 x i32> %x
    698 }
    699 
    700 
    701 define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
    702 ; KNL-LABEL: zext_8x16mem_to_8x32:
    703 ; KNL:       ## BB#0:
    704 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    705 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    706 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    707 ; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    708 ; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
    709 ; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    710 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
    711 ; KNL-NEXT:    retq
    712 ;
    713 ; SKX-LABEL: zext_8x16mem_to_8x32:
    714 ; SKX:       ## BB#0:
    715 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    716 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    717 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    718 ; SKX-NEXT:    retq
    719   %a   = load <8 x i16>,<8 x i16> *%i,align 1
    720   %x   = zext <8 x i16> %a to <8 x i32>
    721   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    722   ret <8 x i32> %ret
    723 }
    724 
    725 define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
    726 ; KNL-LABEL: sext_8x16mem_to_8x32mask:
    727 ; KNL:       ## BB#0:
    728 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    729 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    730 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    731 ; KNL-NEXT:    vpmovsxwd (%rdi), %ymm0
    732 ; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
    733 ; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    734 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
    735 ; KNL-NEXT:    retq
    736 ;
    737 ; SKX-LABEL: sext_8x16mem_to_8x32mask:
    738 ; SKX:       ## BB#0:
    739 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    740 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    741 ; SKX-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z}
    742 ; SKX-NEXT:    retq
    743   %a   = load <8 x i16>,<8 x i16> *%i,align 1
    744   %x   = sext <8 x i16> %a to <8 x i32>
    745   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    746   ret <8 x i32> %ret
    747 }
    748 
    749 define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
    750 ; ALL-LABEL: sext_8x16mem_to_8x32:
    751 ; ALL:       ## BB#0:
    752 ; ALL-NEXT:    vpmovsxwd (%rdi), %ymm0
    753 ; ALL-NEXT:    retq
    754   %a   = load <8 x i16>,<8 x i16> *%i,align 1
    755   %x   = sext <8 x i16> %a to <8 x i32>
    756   ret <8 x i32> %x
    757 }
    758 
    759 define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
    760 ; KNL-LABEL: zext_8x16_to_8x32mask:
    761 ; KNL:       ## BB#0:
    762 ; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
    763 ; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
    764 ; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
    765 ; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    766 ; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
    767 ; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    768 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
    769 ; KNL-NEXT:    retq
    770 ;
    771 ; SKX-LABEL: zext_8x16_to_8x32mask:
    772 ; SKX:       ## BB#0:
    773 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
    774 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
    775 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    776 ; SKX-NEXT:    retq
    777   %x   = zext <8 x i16> %a to <8 x i32>
    778   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    779   ret <8 x i32> %ret
    780 }
    781 
    782 define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
    783 ; ALL-LABEL: zext_8x16_to_8x32:
    784 ; ALL:       ## BB#0:
    785 ; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    786 ; ALL-NEXT:    retq
    787   %x   = zext <8 x i16> %a to <8 x i32>
    788   ret <8 x i32> %x
    789 }
    790 
    791 define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
    792 ; KNL-LABEL: zext_16x16mem_to_16x32:
    793 ; KNL:       ## BB#0:
    794 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    795 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    796 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    797 ; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    798 ; KNL-NEXT:    retq
    799 ;
    800 ; SKX-LABEL: zext_16x16mem_to_16x32:
    801 ; SKX:       ## BB#0:
    802 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
    803 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
    804 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    805 ; SKX-NEXT:    retq
    806   %a   = load <16 x i16>,<16 x i16> *%i,align 1
    807   %x   = zext <16 x i16> %a to <16 x i32>
    808   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    809   ret <16 x i32> %ret
    810 }
    811 
    812 define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
    813 ; KNL-LABEL: sext_16x16mem_to_16x32mask:
    814 ; KNL:       ## BB#0:
    815 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    816 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    817 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    818 ; KNL-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
    819 ; KNL-NEXT:    retq
    820 ;
    821 ; SKX-LABEL: sext_16x16mem_to_16x32mask:
    822 ; SKX:       ## BB#0:
    823 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
    824 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
    825 ; SKX-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
    826 ; SKX-NEXT:    retq
    827   %a   = load <16 x i16>,<16 x i16> *%i,align 1
    828   %x   = sext <16 x i16> %a to <16 x i32>
    829   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    830   ret <16 x i32> %ret
    831 }
    832 
    833 define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
    834 ; ALL-LABEL: sext_16x16mem_to_16x32:
    835 ; ALL:       ## BB#0:
    836 ; ALL-NEXT:    vpmovsxwd (%rdi), %zmm0
    837 ; ALL-NEXT:    retq
    838   %a   = load <16 x i16>,<16 x i16> *%i,align 1
    839   %x   = sext <16 x i16> %a to <16 x i32>
    840   ret <16 x i32> %x
    841 }
    842 define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
    843 ; KNL-LABEL: zext_16x16_to_16x32mask:
    844 ; KNL:       ## BB#0:
    845 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
    846 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
    847 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
    848 ; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    849 ; KNL-NEXT:    retq
    850 ;
    851 ; SKX-LABEL: zext_16x16_to_16x32mask:
    852 ; SKX:       ## BB#0:
    853 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
    854 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
    855 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    856 ; SKX-NEXT:    retq
    857   %x   = zext <16 x i16> %a to <16 x i32>
    858   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    859   ret <16 x i32> %ret
    860 }
    861 
    862 define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
    863 ; ALL-LABEL: zext_16x16_to_16x32:
    864 ; ALL:       ## BB#0:
    865 ; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    866 ; ALL-NEXT:    retq
    867   %x   = zext <16 x i16> %a to <16 x i32>
    868   ret <16 x i32> %x
    869 }
    870 
    871 define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
    872 ; KNL-LABEL: zext_2x16mem_to_2x64:
    873 ; KNL:       ## BB#0:
    874 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
    875 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    876 ; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
    877 ; KNL-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    878 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
    879 ; KNL-NEXT:    retq
    880 ;
    881 ; SKX-LABEL: zext_2x16mem_to_2x64:
    882 ; SKX:       ## BB#0:
    883 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
    884 ; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
    885 ; SKX-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    886 ; SKX-NEXT:    retq
    887   %a   = load <2 x i16>,<2 x i16> *%i,align 1
    888   %x   = zext <2 x i16> %a to <2 x i64>
    889   %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
    890   ret <2 x i64> %ret
    891 }
    892 
    893 define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
    894 ; KNL-LABEL: sext_2x16mem_to_2x64mask:
    895 ; KNL:       ## BB#0:
    896 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
    897 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    898 ; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
    899 ; KNL-NEXT:    vpmovsxwq (%rdi), %xmm1
    900 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
    901 ; KNL-NEXT:    retq
    902 ;
    903 ; SKX-LABEL: sext_2x16mem_to_2x64mask:
    904 ; SKX:       ## BB#0:
    905 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
    906 ; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
    907 ; SKX-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z}
    908 ; SKX-NEXT:    retq
    909   %a   = load <2 x i16>,<2 x i16> *%i,align 1
    910   %x   = sext <2 x i16> %a to <2 x i64>
    911   %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
    912   ret <2 x i64> %ret
    913 }
    914 
    915 define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
    916 ; ALL-LABEL: sext_2x16mem_to_2x64:
    917 ; ALL:       ## BB#0:
    918 ; ALL-NEXT:    vpmovsxwq (%rdi), %xmm0
    919 ; ALL-NEXT:    retq
    920   %a   = load <2 x i16>,<2 x i16> *%i,align 1
    921   %x   = sext <2 x i16> %a to <2 x i64>
    922   ret <2 x i64> %x
    923 }
    924 
    925 define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
    926 ; KNL-LABEL: zext_4x16mem_to_4x64:
    927 ; KNL:       ## BB#0:
    928 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    929 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    930 ; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
    931 ; KNL-NEXT:    vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    932 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
    933 ; KNL-NEXT:    retq
    934 ;
    935 ; SKX-LABEL: zext_4x16mem_to_4x64:
    936 ; SKX:       ## BB#0:
    937 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    938 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
    939 ; SKX-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    940 ; SKX-NEXT:    retq
    941   %a   = load <4 x i16>,<4 x i16> *%i,align 1
    942   %x   = zext <4 x i16> %a to <4 x i64>
    943   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
    944   ret <4 x i64> %ret
    945 }
    946 
    947 define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
    948 ; KNL-LABEL: sext_4x16mem_to_4x64mask:
    949 ; KNL:       ## BB#0:
    950 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    951 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    952 ; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
    953 ; KNL-NEXT:    vpmovsxwq (%rdi), %ymm1
    954 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
    955 ; KNL-NEXT:    retq
    956 ;
    957 ; SKX-LABEL: sext_4x16mem_to_4x64mask:
    958 ; SKX:       ## BB#0:
    959 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    960 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
    961 ; SKX-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z}
    962 ; SKX-NEXT:    retq
    963   %a   = load <4 x i16>,<4 x i16> *%i,align 1
    964   %x   = sext <4 x i16> %a to <4 x i64>
    965   %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
    966   ret <4 x i64> %ret
    967 }
    968 
    969 define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
    970 ; ALL-LABEL: sext_4x16mem_to_4x64:
    971 ; ALL:       ## BB#0:
    972 ; ALL-NEXT:    vpmovsxwq (%rdi), %ymm0
    973 ; ALL-NEXT:    retq
    974   %a   = load <4 x i16>,<4 x i16> *%i,align 1
    975   %x   = sext <4 x i16> %a to <4 x i64>
    976   ret <4 x i64> %x
    977 }
    978 
    979 define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
    980 ; KNL-LABEL: zext_8x16mem_to_8x64:
    981 ; KNL:       ## BB#0:
    982 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    983 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    984 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    985 ; KNL-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    986 ; KNL-NEXT:    retq
    987 ;
    988 ; SKX-LABEL: zext_8x16mem_to_8x64:
    989 ; SKX:       ## BB#0:
    990 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    991 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    992 ; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    993 ; SKX-NEXT:    retq
    994   %a   = load <8 x i16>,<8 x i16> *%i,align 1
    995   %x   = zext <8 x i16> %a to <8 x i64>
    996   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
    997   ret <8 x i64> %ret
    998 }
    999 
   1000 define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
   1001 ; KNL-LABEL: sext_8x16mem_to_8x64mask:
   1002 ; KNL:       ## BB#0:
   1003 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   1004 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   1005 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
   1006 ; KNL-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
   1007 ; KNL-NEXT:    retq
   1008 ;
   1009 ; SKX-LABEL: sext_8x16mem_to_8x64mask:
   1010 ; SKX:       ## BB#0:
   1011 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   1012 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
   1013 ; SKX-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
   1014 ; SKX-NEXT:    retq
   1015   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   1016   %x   = sext <8 x i16> %a to <8 x i64>
   1017   %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   1018   ret <8 x i64> %ret
   1019 }
   1020 
   1021 define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
   1022 ; ALL-LABEL: sext_8x16mem_to_8x64:
   1023 ; ALL:       ## BB#0:
   1024 ; ALL-NEXT:    vpmovsxwq (%rdi), %zmm0
   1025 ; ALL-NEXT:    retq
   1026   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   1027   %x   = sext <8 x i16> %a to <8 x i64>
   1028   ret <8 x i64> %x
   1029 }
   1030 
   1031 define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
   1032 ; KNL-LABEL: zext_8x16_to_8x64mask:
   1033 ; KNL:       ## BB#0:
   1034 ; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
   1035 ; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
   1036 ; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
   1037 ; KNL-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1038 ; KNL-NEXT:    retq
   1039 ;
   1040 ; SKX-LABEL: zext_8x16_to_8x64mask:
   1041 ; SKX:       ## BB#0:
   1042 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
   1043 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
   1044 ; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1045 ; SKX-NEXT:    retq
   1046   %x   = zext <8 x i16> %a to <8 x i64>
   1047   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   1048   ret <8 x i64> %ret
   1049 }
   1050 
   1051 define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
   1052 ; ALL-LABEL: zext_8x16_to_8x64:
   1053 ; ALL:       ## BB#0:
   1054 ; ALL-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1055 ; ALL-NEXT:    retq
   1056   %ret   = zext <8 x i16> %a to <8 x i64>
   1057   ret <8 x i64> %ret
   1058 }
   1059 
   1060 define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
   1061 ; KNL-LABEL: zext_2x32mem_to_2x64:
   1062 ; KNL:       ## BB#0:
   1063 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
   1064 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
   1065 ; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
   1066 ; KNL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
   1067 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
   1068 ; KNL-NEXT:    retq
   1069 ;
   1070 ; SKX-LABEL: zext_2x32mem_to_2x64:
   1071 ; SKX:       ## BB#0:
   1072 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
   1073 ; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
   1074 ; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
   1075 ; SKX-NEXT:    retq
   1076   %a   = load <2 x i32>,<2 x i32> *%i,align 1
   1077   %x   = zext <2 x i32> %a to <2 x i64>
   1078   %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
   1079   ret <2 x i64> %ret
   1080 }
   1081 
   1082 define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
   1083 ; KNL-LABEL: sext_2x32mem_to_2x64mask:
   1084 ; KNL:       ## BB#0:
   1085 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
   1086 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
   1087 ; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
   1088 ; KNL-NEXT:    vpmovsxdq (%rdi), %xmm1
   1089 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
   1090 ; KNL-NEXT:    retq
   1091 ;
   1092 ; SKX-LABEL: sext_2x32mem_to_2x64mask:
   1093 ; SKX:       ## BB#0:
   1094 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
   1095 ; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k1
   1096 ; SKX-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z}
   1097 ; SKX-NEXT:    retq
   1098   %a   = load <2 x i32>,<2 x i32> *%i,align 1
   1099   %x   = sext <2 x i32> %a to <2 x i64>
   1100   %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
   1101   ret <2 x i64> %ret
   1102 }
   1103 
   1104 define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
   1105 ; ALL-LABEL: sext_2x32mem_to_2x64:
   1106 ; ALL:       ## BB#0:
   1107 ; ALL-NEXT:    vpmovsxdq (%rdi), %xmm0
   1108 ; ALL-NEXT:    retq
   1109   %a   = load <2 x i32>,<2 x i32> *%i,align 1
   1110   %x   = sext <2 x i32> %a to <2 x i64>
   1111   ret <2 x i64> %x
   1112 }
   1113 
   1114 define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
   1115 ; KNL-LABEL: zext_4x32mem_to_4x64:
   1116 ; KNL:       ## BB#0:
   1117 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
   1118 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
   1119 ; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
   1120 ; KNL-NEXT:    vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1121 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
   1122 ; KNL-NEXT:    retq
   1123 ;
   1124 ; SKX-LABEL: zext_4x32mem_to_4x64:
   1125 ; SKX:       ## BB#0:
   1126 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
   1127 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
   1128 ; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1129 ; SKX-NEXT:    retq
   1130   %a   = load <4 x i32>,<4 x i32> *%i,align 1
   1131   %x   = zext <4 x i32> %a to <4 x i64>
   1132   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   1133   ret <4 x i64> %ret
   1134 }
   1135 
   1136 define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
   1137 ; KNL-LABEL: sext_4x32mem_to_4x64mask:
   1138 ; KNL:       ## BB#0:
   1139 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
   1140 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
   1141 ; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
   1142 ; KNL-NEXT:    vpmovsxdq (%rdi), %ymm1
   1143 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
   1144 ; KNL-NEXT:    retq
   1145 ;
   1146 ; SKX-LABEL: sext_4x32mem_to_4x64mask:
   1147 ; SKX:       ## BB#0:
   1148 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
   1149 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
   1150 ; SKX-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z}
   1151 ; SKX-NEXT:    retq
   1152   %a   = load <4 x i32>,<4 x i32> *%i,align 1
   1153   %x   = sext <4 x i32> %a to <4 x i64>
   1154   %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   1155   ret <4 x i64> %ret
   1156 }
   1157 
   1158 define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
   1159 ; ALL-LABEL: sext_4x32mem_to_4x64:
   1160 ; ALL:       ## BB#0:
   1161 ; ALL-NEXT:    vpmovsxdq (%rdi), %ymm0
   1162 ; ALL-NEXT:    retq
   1163   %a   = load <4 x i32>,<4 x i32> *%i,align 1
   1164   %x   = sext <4 x i32> %a to <4 x i64>
   1165   ret <4 x i64> %x
   1166 }
   1167 
   1168 define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
   1169 ; ALL-LABEL: sext_4x32_to_4x64:
   1170 ; ALL:       ## BB#0:
   1171 ; ALL-NEXT:    vpmovsxdq %xmm0, %ymm0
   1172 ; ALL-NEXT:    retq
   1173   %x   = sext <4 x i32> %a to <4 x i64>
   1174   ret <4 x i64> %x
   1175 }
   1176 
   1177 define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
   1178 ; KNL-LABEL: zext_4x32_to_4x64mask:
   1179 ; KNL:       ## BB#0:
   1180 ; KNL-NEXT:    vpslld $31, %xmm1, %xmm1
   1181 ; KNL-NEXT:    vpsrad $31, %xmm1, %xmm1
   1182 ; KNL-NEXT:    vpmovsxdq %xmm1, %ymm1
   1183 ; KNL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1184 ; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
   1185 ; KNL-NEXT:    retq
   1186 ;
   1187 ; SKX-LABEL: zext_4x32_to_4x64mask:
   1188 ; SKX:       ## BB#0:
   1189 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
   1190 ; SKX-NEXT:    vptestmd %xmm1, %xmm1, %k1
   1191 ; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1192 ; SKX-NEXT:    retq
   1193   %x   = zext <4 x i32> %a to <4 x i64>
   1194   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   1195   ret <4 x i64> %ret
   1196 }
   1197 
   1198 define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
   1199 ; KNL-LABEL: zext_8x32mem_to_8x64:
   1200 ; KNL:       ## BB#0:
   1201 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   1202 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   1203 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
   1204 ; KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
   1205 ; KNL-NEXT:    retq
   1206 ;
   1207 ; SKX-LABEL: zext_8x32mem_to_8x64:
   1208 ; SKX:       ## BB#0:
   1209 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   1210 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
   1211 ; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
   1212 ; SKX-NEXT:    retq
   1213   %a   = load <8 x i32>,<8 x i32> *%i,align 1
   1214   %x   = zext <8 x i32> %a to <8 x i64>
   1215   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   1216   ret <8 x i64> %ret
   1217 }
   1218 
   1219 define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
   1220 ; KNL-LABEL: sext_8x32mem_to_8x64mask:
   1221 ; KNL:       ## BB#0:
   1222 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   1223 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   1224 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
   1225 ; KNL-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
   1226 ; KNL-NEXT:    retq
   1227 ;
   1228 ; SKX-LABEL: sext_8x32mem_to_8x64mask:
   1229 ; SKX:       ## BB#0:
   1230 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   1231 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
   1232 ; SKX-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
   1233 ; SKX-NEXT:    retq
   1234   %a   = load <8 x i32>,<8 x i32> *%i,align 1
   1235   %x   = sext <8 x i32> %a to <8 x i64>
   1236   %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   1237   ret <8 x i64> %ret
   1238 }
   1239 
   1240 define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
   1241 ; ALL-LABEL: sext_8x32mem_to_8x64:
   1242 ; ALL:       ## BB#0:
   1243 ; ALL-NEXT:    vpmovsxdq (%rdi), %zmm0
   1244 ; ALL-NEXT:    retq
   1245   %a   = load <8 x i32>,<8 x i32> *%i,align 1
   1246   %x   = sext <8 x i32> %a to <8 x i64>
   1247   ret <8 x i64> %x
   1248 }
   1249 
   1250 define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
   1251 ; ALL-LABEL: sext_8x32_to_8x64:
   1252 ; ALL:       ## BB#0:
   1253 ; ALL-NEXT:    vpmovsxdq %ymm0, %zmm0
   1254 ; ALL-NEXT:    retq
   1255   %x   = sext <8 x i32> %a to <8 x i64>
   1256   ret <8 x i64> %x
   1257 }
   1258 
   1259 define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
   1260 ; KNL-LABEL: zext_8x32_to_8x64mask:
   1261 ; KNL:       ## BB#0:
   1262 ; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
   1263 ; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
   1264 ; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
   1265 ; KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
   1266 ; KNL-NEXT:    retq
   1267 ;
   1268 ; SKX-LABEL: zext_8x32_to_8x64mask:
   1269 ; SKX:       ## BB#0:
   1270 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
   1271 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
   1272 ; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
   1273 ; SKX-NEXT:    retq
   1274   %x   = zext <8 x i32> %a to <8 x i64>
   1275   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   1276   ret <8 x i64> %ret
   1277 }
   1278 define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
   1279 ; ALL-LABEL: fptrunc_test:
   1280 ; ALL:       ## BB#0:
   1281 ; ALL-NEXT:    vcvtpd2ps %zmm0, %ymm0
   1282 ; ALL-NEXT:    retq
   1283   %b = fptrunc <8 x double> %a to <8 x float>
   1284   ret <8 x float> %b
   1285 }
   1286 
   1287 define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
   1288 ; ALL-LABEL: fpext_test:
   1289 ; ALL:       ## BB#0:
   1290 ; ALL-NEXT:    vcvtps2pd %ymm0, %zmm0
   1291 ; ALL-NEXT:    retq
   1292   %b = fpext <8 x float> %a to <8 x double>
   1293   ret <8 x double> %b
   1294 }
   1295 
   1296 define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
   1297 ; ALL-LABEL: zext_16i1_to_16xi32:
   1298 ; ALL:       ## BB#0:
   1299 ; ALL-NEXT:    kmovw %edi, %k1
   1300 ; ALL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
   1301 ; ALL-NEXT:    retq
   1302   %a = bitcast i16 %b to <16 x i1>
   1303   %c = zext <16 x i1> %a to <16 x i32>
   1304   ret <16 x i32> %c
   1305 }
   1306 
   1307 define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
   1308 ; KNL-LABEL: zext_8i1_to_8xi64:
   1309 ; KNL:       ## BB#0:
   1310 ; KNL-NEXT:    kmovw %edi, %k1
   1311 ; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
   1312 ; KNL-NEXT:    retq
   1313 ;
   1314 ; SKX-LABEL: zext_8i1_to_8xi64:
   1315 ; SKX:       ## BB#0:
   1316 ; SKX-NEXT:    kmovb %edi, %k1
   1317 ; SKX-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
   1318 ; SKX-NEXT:    retq
   1319   %a = bitcast i8 %b to <8 x i1>
   1320   %c = zext <8 x i1> %a to <8 x i64>
   1321   ret <8 x i64> %c
   1322 }
   1323 
   1324 define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
   1325 ; KNL-LABEL: trunc_16i8_to_16i1:
   1326 ; KNL:       ## BB#0:
   1327 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
   1328 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   1329 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   1330 ; KNL-NEXT:    kmovw %k0, %eax
   1331 ; KNL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
   1332 ; KNL-NEXT:    retq
   1333 ;
   1334 ; SKX-LABEL: trunc_16i8_to_16i1:
   1335 ; SKX:       ## BB#0:
   1336 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
   1337 ; SKX-NEXT:    vpmovb2m %xmm0, %k0
   1338 ; SKX-NEXT:    kmovw %k0, %eax
   1339 ; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
   1340 ; SKX-NEXT:    retq
   1341   %mask_b = trunc <16 x i8>%a to <16 x i1>
   1342   %mask = bitcast <16 x i1> %mask_b to i16
   1343   ret i16 %mask
   1344 }
   1345 
   1346 define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
   1347 ; ALL-LABEL: trunc_16i32_to_16i1:
   1348 ; ALL:       ## BB#0:
   1349 ; ALL-NEXT:    vpslld $31, %zmm0, %zmm0
   1350 ; ALL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   1351 ; ALL-NEXT:    kmovw %k0, %eax
   1352 ; ALL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
   1353 ; ALL-NEXT:    retq
   1354   %mask_b = trunc <16 x i32>%a to <16 x i1>
   1355   %mask = bitcast <16 x i1> %mask_b to i16
   1356   ret i16 %mask
   1357 }
   1358 
   1359 define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
   1360 ; KNL-LABEL: trunc_4i32_to_4i1:
   1361 ; KNL:       ## BB#0:
   1362 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
   1363 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
   1364 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
   1365 ; KNL-NEXT:    retq
   1366 ;
   1367 ; SKX-LABEL: trunc_4i32_to_4i1:
   1368 ; SKX:       ## BB#0:
   1369 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
   1370 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1
   1371 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm0
   1372 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0 {%k1}
   1373 ; SKX-NEXT:    vpmovm2d %k0, %xmm0
   1374 ; SKX-NEXT:    retq
   1375   %mask_a = trunc <4 x i32>%a to <4 x i1>
   1376   %mask_b = trunc <4 x i32>%b to <4 x i1>
   1377   %a_and_b = and <4 x i1>%mask_a, %mask_b
   1378   %res = sext <4 x i1>%a_and_b to <4 x i32>
   1379   ret <4 x i32>%res
   1380 }
   1381 
   1382 
   1383 define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
   1384 ; KNL-LABEL: trunc_8i16_to_8i1:
   1385 ; KNL:       ## BB#0:
   1386 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   1387 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   1388 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
   1389 ; KNL-NEXT:    kmovw %k0, %eax
   1390 ; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1391 ; KNL-NEXT:    retq
   1392 ;
   1393 ; SKX-LABEL: trunc_8i16_to_8i1:
   1394 ; SKX:       ## BB#0:
   1395 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   1396 ; SKX-NEXT:    vpmovw2m %xmm0, %k0
   1397 ; SKX-NEXT:    kmovb %k0, %eax
   1398 ; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1399 ; SKX-NEXT:    retq
   1400   %mask_b = trunc <8 x i16>%a to <8 x i1>
   1401   %mask = bitcast <8 x i1> %mask_b to i8
   1402   ret i8 %mask
   1403 }
   1404 
   1405 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   1406 ; KNL-LABEL: sext_8i1_8i32:
   1407 ; KNL:       ## BB#0:
   1408 ; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
   1409 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
   1410 ; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
   1411 ; KNL-NEXT:    knotw %k0, %k1
   1412 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
   1413 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
   1414 ; KNL-NEXT:    vpmovqd %zmm0, %ymm0
   1415 ; KNL-NEXT:    retq
   1416 ;
   1417 ; SKX-LABEL: sext_8i1_8i32:
   1418 ; SKX:       ## BB#0:
   1419 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
   1420 ; SKX-NEXT:    knotb %k0, %k0
   1421 ; SKX-NEXT:    vpmovm2d %k0, %ymm0
   1422 ; SKX-NEXT:    retq
   1423   %x = icmp slt <8 x i32> %a1, %a2
   1424   %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
   1425   %y = sext <8 x i1> %x1 to <8 x i32>
   1426   ret <8 x i32> %y
   1427 }
   1428 
   1429 
   1430 define i16 @trunc_i32_to_i1(i32 %a) {
   1431 ; ALL-LABEL: trunc_i32_to_i1:
   1432 ; ALL:       ## BB#0:
   1433 ; ALL-NEXT:    andl $1, %edi
   1434 ; ALL-NEXT:    kmovw %edi, %k0
   1435 ; ALL-NEXT:    movw $-4, %ax
   1436 ; ALL-NEXT:    kmovw %eax, %k1
   1437 ; ALL-NEXT:    korw %k0, %k1, %k0
   1438 ; ALL-NEXT:    kmovw %k0, %eax
   1439 ; ALL-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
   1440 ; ALL-NEXT:    retq
   1441   %a_i = trunc i32 %a to i1
   1442   %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
   1443   %res = bitcast <16 x i1> %maskv to i16
   1444   ret i16 %res
   1445 }
   1446 
   1447 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   1448 ; KNL-LABEL: sext_8i1_8i16:
   1449 ; KNL:       ## BB#0:
   1450 ; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
   1451 ; KNL-NEXT:    vpmovdw %zmm0, %ymm0
   1452 ; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
   1453 ; KNL-NEXT:    retq
   1454 ;
   1455 ; SKX-LABEL: sext_8i1_8i16:
   1456 ; SKX:       ## BB#0:
   1457 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
   1458 ; SKX-NEXT:    vpmovm2w %k0, %xmm0
   1459 ; SKX-NEXT:    retq
   1460   %x = icmp slt <8 x i32> %a1, %a2
   1461   %y = sext <8 x i1> %x to <8 x i16>
   1462   ret <8 x i16> %y
   1463 }
   1464 
   1465 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
   1466 ; KNL-LABEL: sext_16i1_16i32:
   1467 ; KNL:       ## BB#0:
   1468 ; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
   1469 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
   1470 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
   1471 ; KNL-NEXT:    retq
   1472 ;
   1473 ; SKX-LABEL: sext_16i1_16i32:
   1474 ; SKX:       ## BB#0:
   1475 ; SKX-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
   1476 ; SKX-NEXT:    vpmovm2d %k0, %zmm0
   1477 ; SKX-NEXT:    retq
   1478   %x = icmp slt <16 x i32> %a1, %a2
   1479   %y = sext <16 x i1> %x to <16 x i32>
   1480   ret <16 x i32> %y
   1481 }
   1482 
   1483 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   1484 ; KNL-LABEL: sext_8i1_8i64:
   1485 ; KNL:       ## BB#0:
   1486 ; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
   1487 ; KNL-NEXT:    vpmovsxdq %ymm0, %zmm0
   1488 ; KNL-NEXT:    retq
   1489 ;
   1490 ; SKX-LABEL: sext_8i1_8i64:
   1491 ; SKX:       ## BB#0:
   1492 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
   1493 ; SKX-NEXT:    vpmovm2q %k0, %zmm0
   1494 ; SKX-NEXT:    retq
   1495   %x = icmp slt <8 x i32> %a1, %a2
   1496   %y = sext <8 x i1> %x to <8 x i64>
   1497   ret <8 x i64> %y
   1498 }
   1499 
   1500 define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
   1501 ; ALL-LABEL: extload_v8i64:
   1502 ; ALL:       ## BB#0:
   1503 ; ALL-NEXT:    vpmovsxbq (%rdi), %zmm0
   1504 ; ALL-NEXT:    vmovdqa64 %zmm0, (%rsi)
   1505 ; ALL-NEXT:    retq
   1506   %sign_load = load <8 x i8>, <8 x i8>* %a
   1507   %c = sext <8 x i8> %sign_load to <8 x i64>
   1508   store <8 x i64> %c, <8 x i64>* %res
   1509   ret void
   1510 }
   1511 
   1512 define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
   1513 ; KNL-LABEL: test21:
   1514 ; KNL:       ## BB#0:
   1515 ; KNL-NEXT:    pushq %rbp
   1516 ; KNL-NEXT:    pushq %r15
   1517 ; KNL-NEXT:    pushq %r14
   1518 ; KNL-NEXT:    pushq %r13
   1519 ; KNL-NEXT:    pushq %r12
   1520 ; KNL-NEXT:    pushq %rbx
   1521 ; KNL-NEXT:    vpmovsxbd %xmm7, %zmm7
   1522 ; KNL-NEXT:    vpslld $31, %zmm7, %zmm7
   1523 ; KNL-NEXT:    vpmovsxbd %xmm6, %zmm6
   1524 ; KNL-NEXT:    vpslld $31, %zmm6, %zmm6
   1525 ; KNL-NEXT:    vpmovsxbd %xmm5, %zmm5
   1526 ; KNL-NEXT:    vpslld $31, %zmm5, %zmm5
   1527 ; KNL-NEXT:    vpmovsxbd %xmm4, %zmm4
   1528 ; KNL-NEXT:    vpslld $31, %zmm4, %zmm4
   1529 ; KNL-NEXT:    vptestmd %zmm4, %zmm4, %k0
   1530 ; KNL-NEXT:    kshiftlw $14, %k0, %k1
   1531 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1532 ; KNL-NEXT:    kmovw %k1, %ecx
   1533 ; KNL-NEXT:    kshiftlw $15, %k0, %k1
   1534 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1535 ; KNL-NEXT:    kmovw %k1, %r15d
   1536 ; KNL-NEXT:    kshiftlw $13, %k0, %k1
   1537 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1538 ; KNL-NEXT:    kmovw %k1, %r12d
   1539 ; KNL-NEXT:    kshiftlw $12, %k0, %k1
   1540 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1541 ; KNL-NEXT:    kmovw %k1, %edx
   1542 ; KNL-NEXT:    kshiftlw $11, %k0, %k1
   1543 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1544 ; KNL-NEXT:    kmovw %k1, %r13d
   1545 ; KNL-NEXT:    kshiftlw $10, %k0, %k1
   1546 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1547 ; KNL-NEXT:    kmovw %k1, %eax
   1548 ; KNL-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
   1549 ; KNL-NEXT:    kshiftlw $9, %k0, %k1
   1550 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1551 ; KNL-NEXT:    kmovw %k1, %esi
   1552 ; KNL-NEXT:    kshiftlw $8, %k0, %k1
   1553 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1554 ; KNL-NEXT:    kmovw %k1, %edi
   1555 ; KNL-NEXT:    kshiftlw $7, %k0, %k1
   1556 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1557 ; KNL-NEXT:    kmovw %k1, %r8d
   1558 ; KNL-NEXT:    kshiftlw $6, %k0, %k1
   1559 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1560 ; KNL-NEXT:    kmovw %k1, %r9d
   1561 ; KNL-NEXT:    kshiftlw $5, %k0, %k1
   1562 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1563 ; KNL-NEXT:    kmovw %k1, %r10d
   1564 ; KNL-NEXT:    kshiftlw $4, %k0, %k1
   1565 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1566 ; KNL-NEXT:    kmovw %k1, %r11d
   1567 ; KNL-NEXT:    kshiftlw $3, %k0, %k1
   1568 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1569 ; KNL-NEXT:    kmovw %k1, %ebx
   1570 ; KNL-NEXT:    kshiftlw $2, %k0, %k1
   1571 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1572 ; KNL-NEXT:    kmovw %k1, %ebp
   1573 ; KNL-NEXT:    kshiftlw $1, %k0, %k1
   1574 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1575 ; KNL-NEXT:    kmovw %k1, %r14d
   1576 ; KNL-NEXT:    vptestmd %zmm5, %zmm5, %k2
   1577 ; KNL-NEXT:    kshiftlw $0, %k0, %k0
   1578 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1579 ; KNL-NEXT:    vmovd %r15d, %xmm4
   1580 ; KNL-NEXT:    kmovw %k0, %r15d
   1581 ; KNL-NEXT:    kshiftlw $14, %k2, %k0
   1582 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1583 ; KNL-NEXT:    vpinsrb $1, %ecx, %xmm4, %xmm4
   1584 ; KNL-NEXT:    kmovw %k0, %ecx
   1585 ; KNL-NEXT:    kshiftlw $15, %k2, %k0
   1586 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1587 ; KNL-NEXT:    vpinsrb $2, %r12d, %xmm4, %xmm4
   1588 ; KNL-NEXT:    kmovw %k0, %eax
   1589 ; KNL-NEXT:    kshiftlw $13, %k2, %k0
   1590 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1591 ; KNL-NEXT:    vpinsrb $3, %edx, %xmm4, %xmm4
   1592 ; KNL-NEXT:    kmovw %k0, %r12d
   1593 ; KNL-NEXT:    kshiftlw $12, %k2, %k0
   1594 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1595 ; KNL-NEXT:    vpinsrb $4, %r13d, %xmm4, %xmm4
   1596 ; KNL-NEXT:    kmovw %k0, %edx
   1597 ; KNL-NEXT:    kshiftlw $11, %k2, %k0
   1598 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1599 ; KNL-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
   1600 ; KNL-NEXT:    kmovw %k0, %r13d
   1601 ; KNL-NEXT:    kshiftlw $10, %k2, %k0
   1602 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1603 ; KNL-NEXT:    vpinsrb $6, %esi, %xmm4, %xmm4
   1604 ; KNL-NEXT:    kmovw %k0, %esi
   1605 ; KNL-NEXT:    movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
   1606 ; KNL-NEXT:    kshiftlw $9, %k2, %k0
   1607 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1608 ; KNL-NEXT:    vpinsrb $7, %edi, %xmm4, %xmm4
   1609 ; KNL-NEXT:    kmovw %k0, %esi
   1610 ; KNL-NEXT:    kshiftlw $8, %k2, %k0
   1611 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1612 ; KNL-NEXT:    vpinsrb $8, %r8d, %xmm4, %xmm4
   1613 ; KNL-NEXT:    kmovw %k0, %edi
   1614 ; KNL-NEXT:    kshiftlw $7, %k2, %k0
   1615 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1616 ; KNL-NEXT:    vpinsrb $9, %r9d, %xmm4, %xmm4
   1617 ; KNL-NEXT:    kmovw %k0, %r8d
   1618 ; KNL-NEXT:    kshiftlw $6, %k2, %k0
   1619 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1620 ; KNL-NEXT:    vpinsrb $10, %r10d, %xmm4, %xmm4
   1621 ; KNL-NEXT:    kmovw %k0, %r9d
   1622 ; KNL-NEXT:    kshiftlw $5, %k2, %k0
   1623 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1624 ; KNL-NEXT:    vpinsrb $11, %r11d, %xmm4, %xmm4
   1625 ; KNL-NEXT:    kmovw %k0, %r10d
   1626 ; KNL-NEXT:    kshiftlw $4, %k2, %k0
   1627 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1628 ; KNL-NEXT:    vpinsrb $12, %ebx, %xmm4, %xmm4
   1629 ; KNL-NEXT:    kmovw %k0, %ebx
   1630 ; KNL-NEXT:    kshiftlw $3, %k2, %k0
   1631 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1632 ; KNL-NEXT:    vpinsrb $13, %ebp, %xmm4, %xmm4
   1633 ; KNL-NEXT:    kmovw %k0, %ebp
   1634 ; KNL-NEXT:    kshiftlw $2, %k2, %k0
   1635 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1636 ; KNL-NEXT:    vpinsrb $14, %r14d, %xmm4, %xmm4
   1637 ; KNL-NEXT:    kmovw %k0, %r11d
   1638 ; KNL-NEXT:    kshiftlw $1, %k2, %k0
   1639 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1640 ; KNL-NEXT:    vpinsrb $15, %r15d, %xmm4, %xmm4
   1641 ; KNL-NEXT:    kmovw %k0, %r14d
   1642 ; KNL-NEXT:    vptestmd %zmm6, %zmm6, %k1
   1643 ; KNL-NEXT:    kshiftlw $0, %k2, %k0
   1644 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1645 ; KNL-NEXT:    vmovd %eax, %xmm5
   1646 ; KNL-NEXT:    kmovw %k0, %r15d
   1647 ; KNL-NEXT:    kshiftlw $14, %k1, %k0
   1648 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1649 ; KNL-NEXT:    vpinsrb $1, %ecx, %xmm5, %xmm5
   1650 ; KNL-NEXT:    kmovw %k0, %ecx
   1651 ; KNL-NEXT:    kshiftlw $15, %k1, %k0
   1652 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1653 ; KNL-NEXT:    vpinsrb $2, %r12d, %xmm5, %xmm5
   1654 ; KNL-NEXT:    kmovw %k0, %eax
   1655 ; KNL-NEXT:    kshiftlw $13, %k1, %k0
   1656 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1657 ; KNL-NEXT:    vpinsrb $3, %edx, %xmm5, %xmm5
   1658 ; KNL-NEXT:    kmovw %k0, %r12d
   1659 ; KNL-NEXT:    kshiftlw $12, %k1, %k0
   1660 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1661 ; KNL-NEXT:    vpinsrb $4, %r13d, %xmm5, %xmm5
   1662 ; KNL-NEXT:    kmovw %k0, %edx
   1663 ; KNL-NEXT:    kshiftlw $11, %k1, %k0
   1664 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1665 ; KNL-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
   1666 ; KNL-NEXT:    kmovw %k0, %r13d
   1667 ; KNL-NEXT:    kshiftlw $10, %k1, %k0
   1668 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1669 ; KNL-NEXT:    vpinsrb $6, %esi, %xmm5, %xmm5
   1670 ; KNL-NEXT:    kmovw %k0, %esi
   1671 ; KNL-NEXT:    movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
   1672 ; KNL-NEXT:    kshiftlw $9, %k1, %k0
   1673 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1674 ; KNL-NEXT:    vpinsrb $7, %edi, %xmm5, %xmm5
   1675 ; KNL-NEXT:    kmovw %k0, %esi
   1676 ; KNL-NEXT:    kshiftlw $8, %k1, %k0
   1677 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1678 ; KNL-NEXT:    vpinsrb $8, %r8d, %xmm5, %xmm5
   1679 ; KNL-NEXT:    kmovw %k0, %edi
   1680 ; KNL-NEXT:    kshiftlw $7, %k1, %k0
   1681 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1682 ; KNL-NEXT:    vpinsrb $9, %r9d, %xmm5, %xmm5
   1683 ; KNL-NEXT:    kmovw %k0, %r8d
   1684 ; KNL-NEXT:    kshiftlw $6, %k1, %k0
   1685 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1686 ; KNL-NEXT:    vpinsrb $10, %r10d, %xmm5, %xmm5
   1687 ; KNL-NEXT:    kmovw %k0, %r9d
   1688 ; KNL-NEXT:    kshiftlw $5, %k1, %k0
   1689 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1690 ; KNL-NEXT:    vpinsrb $11, %ebx, %xmm5, %xmm5
   1691 ; KNL-NEXT:    kmovw %k0, %ebx
   1692 ; KNL-NEXT:    kshiftlw $4, %k1, %k0
   1693 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1694 ; KNL-NEXT:    vpinsrb $12, %ebp, %xmm5, %xmm5
   1695 ; KNL-NEXT:    kmovw %k0, %ebp
   1696 ; KNL-NEXT:    kshiftlw $3, %k1, %k0
   1697 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1698 ; KNL-NEXT:    vpinsrb $13, %r11d, %xmm5, %xmm5
   1699 ; KNL-NEXT:    kmovw %k0, %r10d
   1700 ; KNL-NEXT:    kshiftlw $2, %k1, %k0
   1701 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1702 ; KNL-NEXT:    vpinsrb $14, %r14d, %xmm5, %xmm5
   1703 ; KNL-NEXT:    kmovw %k0, %r11d
   1704 ; KNL-NEXT:    kshiftlw $1, %k1, %k0
   1705 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1706 ; KNL-NEXT:    vpinsrb $15, %r15d, %xmm5, %xmm5
   1707 ; KNL-NEXT:    kmovw %k0, %r14d
   1708 ; KNL-NEXT:    vptestmd %zmm7, %zmm7, %k0
   1709 ; KNL-NEXT:    kshiftlw $0, %k1, %k1
   1710 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1711 ; KNL-NEXT:    vmovd %eax, %xmm6
   1712 ; KNL-NEXT:    kmovw %k1, %r15d
   1713 ; KNL-NEXT:    kshiftlw $14, %k0, %k1
   1714 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1715 ; KNL-NEXT:    vpinsrb $1, %ecx, %xmm6, %xmm6
   1716 ; KNL-NEXT:    kmovw %k1, %ecx
   1717 ; KNL-NEXT:    kshiftlw $15, %k0, %k1
   1718 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1719 ; KNL-NEXT:    vpinsrb $2, %r12d, %xmm6, %xmm6
   1720 ; KNL-NEXT:    kmovw %k1, %r12d
   1721 ; KNL-NEXT:    kshiftlw $13, %k0, %k1
   1722 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1723 ; KNL-NEXT:    vpinsrb $3, %edx, %xmm6, %xmm6
   1724 ; KNL-NEXT:    kmovw %k1, %edx
   1725 ; KNL-NEXT:    kshiftlw $12, %k0, %k1
   1726 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1727 ; KNL-NEXT:    vpinsrb $4, %r13d, %xmm6, %xmm6
   1728 ; KNL-NEXT:    kmovw %k1, %r13d
   1729 ; KNL-NEXT:    kshiftlw $11, %k0, %k1
   1730 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1731 ; KNL-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
   1732 ; KNL-NEXT:    kmovw %k1, %eax
   1733 ; KNL-NEXT:    kshiftlw $10, %k0, %k1
   1734 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1735 ; KNL-NEXT:    vpinsrb $6, %esi, %xmm6, %xmm6
   1736 ; KNL-NEXT:    kmovw %k1, %esi
   1737 ; KNL-NEXT:    kshiftlw $9, %k0, %k1
   1738 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1739 ; KNL-NEXT:    vpinsrb $7, %edi, %xmm6, %xmm6
   1740 ; KNL-NEXT:    kmovw %k1, %edi
   1741 ; KNL-NEXT:    kshiftlw $8, %k0, %k1
   1742 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1743 ; KNL-NEXT:    vpinsrb $8, %r8d, %xmm6, %xmm6
   1744 ; KNL-NEXT:    kmovw %k1, %r8d
   1745 ; KNL-NEXT:    kshiftlw $7, %k0, %k1
   1746 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1747 ; KNL-NEXT:    vpinsrb $9, %r9d, %xmm6, %xmm6
   1748 ; KNL-NEXT:    kmovw %k1, %r9d
   1749 ; KNL-NEXT:    kshiftlw $6, %k0, %k1
   1750 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1751 ; KNL-NEXT:    vpinsrb $10, %ebx, %xmm6, %xmm6
   1752 ; KNL-NEXT:    kmovw %k1, %ebx
   1753 ; KNL-NEXT:    kshiftlw $5, %k0, %k1
   1754 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1755 ; KNL-NEXT:    vpinsrb $11, %ebp, %xmm6, %xmm6
   1756 ; KNL-NEXT:    kmovw %k1, %ebp
   1757 ; KNL-NEXT:    kshiftlw $4, %k0, %k1
   1758 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1759 ; KNL-NEXT:    vpinsrb $12, %r10d, %xmm6, %xmm6
   1760 ; KNL-NEXT:    kmovw %k1, %r10d
   1761 ; KNL-NEXT:    kshiftlw $3, %k0, %k1
   1762 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1763 ; KNL-NEXT:    vpinsrb $13, %r11d, %xmm6, %xmm6
   1764 ; KNL-NEXT:    kmovw %k1, %r11d
   1765 ; KNL-NEXT:    kshiftlw $2, %k0, %k1
   1766 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1767 ; KNL-NEXT:    vpinsrb $14, %r14d, %xmm6, %xmm6
   1768 ; KNL-NEXT:    kmovw %k1, %r14d
   1769 ; KNL-NEXT:    kshiftlw $1, %k0, %k1
   1770 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1771 ; KNL-NEXT:    vpinsrb $15, %r15d, %xmm6, %xmm6
   1772 ; KNL-NEXT:    kmovw %k1, %r15d
   1773 ; KNL-NEXT:    kshiftlw $0, %k0, %k0
   1774 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1775 ; KNL-NEXT:    vmovd %r12d, %xmm7
   1776 ; KNL-NEXT:    kmovw %k0, %r12d
   1777 ; KNL-NEXT:    vpinsrb $1, %ecx, %xmm7, %xmm7
   1778 ; KNL-NEXT:    vpinsrb $2, %edx, %xmm7, %xmm7
   1779 ; KNL-NEXT:    vpinsrb $3, %r13d, %xmm7, %xmm7
   1780 ; KNL-NEXT:    vpinsrb $4, %eax, %xmm7, %xmm7
   1781 ; KNL-NEXT:    vpinsrb $5, %esi, %xmm7, %xmm7
   1782 ; KNL-NEXT:    vpinsrb $6, %edi, %xmm7, %xmm7
   1783 ; KNL-NEXT:    vpinsrb $7, %r8d, %xmm7, %xmm7
   1784 ; KNL-NEXT:    vpinsrb $8, %r9d, %xmm7, %xmm7
   1785 ; KNL-NEXT:    vpinsrb $9, %ebx, %xmm7, %xmm7
   1786 ; KNL-NEXT:    vpinsrb $10, %ebp, %xmm7, %xmm7
   1787 ; KNL-NEXT:    vpinsrb $11, %r10d, %xmm7, %xmm7
   1788 ; KNL-NEXT:    vpinsrb $12, %r11d, %xmm7, %xmm7
   1789 ; KNL-NEXT:    vpinsrb $13, %r14d, %xmm7, %xmm7
   1790 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
   1791 ; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
   1792 ; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
   1793 ; KNL-NEXT:    vpand %ymm0, %ymm4, %ymm0
   1794 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
   1795 ; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
   1796 ; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
   1797 ; KNL-NEXT:    vpand %ymm1, %ymm4, %ymm1
   1798 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
   1799 ; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
   1800 ; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
   1801 ; KNL-NEXT:    vpand %ymm2, %ymm4, %ymm2
   1802 ; KNL-NEXT:    vpinsrb $14, %r15d, %xmm7, %xmm4
   1803 ; KNL-NEXT:    vpinsrb $15, %r12d, %xmm4, %xmm4
   1804 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
   1805 ; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
   1806 ; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
   1807 ; KNL-NEXT:    vpand %ymm3, %ymm4, %ymm3
   1808 ; KNL-NEXT:    popq %rbx
   1809 ; KNL-NEXT:    popq %r12
   1810 ; KNL-NEXT:    popq %r13
   1811 ; KNL-NEXT:    popq %r14
   1812 ; KNL-NEXT:    popq %r15
   1813 ; KNL-NEXT:    popq %rbp
   1814 ; KNL-NEXT:    retq
   1815 ;
   1816 ; SKX-LABEL: test21:
   1817 ; SKX:       ## BB#0:
   1818 ; SKX-NEXT:    vpsllw $7, %zmm2, %zmm2
   1819 ; SKX-NEXT:    vpmovb2m %zmm2, %k1
   1820 ; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
   1821 ; SKX-NEXT:    kshiftrq $32, %k1, %k1
   1822 ; SKX-NEXT:    vmovdqu16 %zmm1, %zmm1 {%k1} {z}
   1823 ; SKX-NEXT:    retq
   1824   %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
   1825   ret <64 x i16> %ret
   1826 }
   1827 
   1828 define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
   1829 ; ALL-LABEL: shuffle_zext_16x8_to_16x16:
   1830 ; ALL:       ## BB#0:
   1831 ; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   1832 ; ALL-NEXT:    retq
   1833   %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
   1834   %2 = bitcast <32 x i8> %1 to <16 x i16>
   1835   ret <16 x i16> %2
   1836 }
   1837 
   1838 define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
   1839 ; ALL-LABEL: zext_32x8_to_16x16:
   1840 ; ALL:       ## BB#0:
   1841 ; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   1842 ; ALL-NEXT:    retq
   1843   %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
   1844   %2 = bitcast <32 x i8> %1 to <16 x i16>
   1845   ret <16 x i16> %2
   1846 }
   1847 
   1848 define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
   1849 ; ALL-LABEL: zext_32x8_to_8x32:
   1850 ; ALL:       ## BB#0:
   1851 ; ALL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1852 ; ALL-NEXT:    retq
   1853   %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
   1854   %2 = bitcast <32 x i8> %1 to <8 x i32>
   1855   ret <8 x i32> %2
   1856 }
   1857 
   1858 define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
   1859 ; ALL-LABEL: zext_32x8_to_4x64:
   1860 ; ALL:       ## BB#0:
   1861 ; ALL-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   1862 ; ALL-NEXT:    retq
   1863   %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
   1864   %2 = bitcast <32 x i8> %1 to <4 x i64>
   1865   ret <4 x i64> %2
   1866 }
   1867 
   1868 define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
   1869 ; ALL-LABEL: zext_16x16_to_8x32:
   1870 ; ALL:       ## BB#0:
   1871 ; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1872 ; ALL-NEXT:    retq
   1873   %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
   1874   %2 = bitcast <16 x i16> %1 to <8 x i32>
   1875   ret <8 x i32> %2
   1876 }
   1877 
   1878 define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
   1879 ; ALL-LABEL: zext_16x16_to_4x64:
   1880 ; ALL:       ## BB#0:
   1881 ; ALL-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1882 ; ALL-NEXT:    retq
   1883   %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
   1884   %2 = bitcast <16 x i16> %1 to <4 x i64>
   1885   ret <4 x i64> %2
   1886 }
   1887 
   1888 define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
   1889 ; ALL-LABEL: zext_8x32_to_4x64:
   1890 ; ALL:       ## BB#0:
   1891 ; ALL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1892 ; ALL-NEXT:    retq
   1893   %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
   1894   %2 = bitcast <8 x i32> %1 to <4 x i64>
   1895   ret <4 x i64> %2
   1896 }
   1897 
   1898 define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
   1899 ; KNL-LABEL: zext_64xi1_to_64xi8:
   1900 ; KNL:       ## BB#0:
   1901 ; KNL-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
   1902 ; KNL-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1903 ; KNL-NEXT:    vpand %ymm2, %ymm0, %ymm0
   1904 ; KNL-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
   1905 ; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
   1906 ; KNL-NEXT:    retq
   1907 ;
   1908 ; SKX-LABEL: zext_64xi1_to_64xi8:
   1909 ; SKX:       ## BB#0:
   1910 ; SKX-NEXT:    vpcmpeqb %zmm1, %zmm0, %k1
   1911 ; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
   1912 ; SKX-NEXT:    retq
   1913   %mask = icmp eq <64 x i8> %x, %y
   1914   %1 = zext <64 x i1> %mask to <64 x i8>
   1915   ret <64 x i8> %1
   1916 }
   1917 
   1918 define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
   1919 ; KNL-LABEL: zext_32xi1_to_32xi16:
   1920 ; KNL:       ## BB#0:
   1921 ; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
   1922 ; KNL-NEXT:    vpsrlw $15, %ymm0, %ymm0
   1923 ; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
   1924 ; KNL-NEXT:    vpsrlw $15, %ymm1, %ymm1
   1925 ; KNL-NEXT:    retq
   1926 ;
   1927 ; SKX-LABEL: zext_32xi1_to_32xi16:
   1928 ; SKX:       ## BB#0:
   1929 ; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
   1930 ; SKX-NEXT:    vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
   1931 ; SKX-NEXT:    retq
   1932   %mask = icmp eq <32 x i16> %x, %y
   1933   %1 = zext <32 x i1> %mask to <32 x i16>
   1934   ret <32 x i16> %1
   1935 }
   1936 
   1937 define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
   1938 ; KNL-LABEL: zext_16xi1_to_16xi16:
   1939 ; KNL:       ## BB#0:
   1940 ; KNL-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
   1941 ; KNL-NEXT:    vpsrlw $15, %ymm0, %ymm0
   1942 ; KNL-NEXT:    retq
   1943 ;
   1944 ; SKX-LABEL: zext_16xi1_to_16xi16:
   1945 ; SKX:       ## BB#0:
   1946 ; SKX-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
   1947 ; SKX-NEXT:    vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z}
   1948 ; SKX-NEXT:    retq
   1949   %mask = icmp eq <16 x i16> %x, %y
   1950   %1 = zext <16 x i1> %mask to <16 x i16>
   1951   ret <16 x i16> %1
   1952 }
   1953 
   1954 
   1955 define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
   1956 ; KNL-LABEL: zext_32xi1_to_32xi8:
   1957 ; KNL:       ## BB#0:
   1958 ; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
   1959 ; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
   1960 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
   1961 ; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
   1962 ; KNL-NEXT:    vpmovsxwd %ymm1, %zmm1
   1963 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
   1964 ; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1965 ; KNL-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
   1966 ; KNL-NEXT:    retq
   1967 ;
   1968 ; SKX-LABEL: zext_32xi1_to_32xi8:
   1969 ; SKX:       ## BB#0:
   1970 ; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
   1971 ; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
   1972 ; SKX-NEXT:    retq
   1973   %mask = icmp eq <32 x i16> %x, %y
   1974   %1 = zext <32 x i1> %mask to <32 x i8>
   1975   ret <32 x i8> %1
   1976 }
   1977 
   1978 define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
   1979 ; KNL-LABEL: zext_4xi1_to_4x32:
   1980 ; KNL:       ## BB#0:
   1981 ; KNL-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
   1982 ; KNL-NEXT:    vpand %xmm2, %xmm1, %xmm1
   1983 ; KNL-NEXT:    vpand %xmm2, %xmm0, %xmm0
   1984 ; KNL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
   1985 ; KNL-NEXT:    vpsrld $31, %xmm0, %xmm0
   1986 ; KNL-NEXT:    retq
   1987 ;
   1988 ; SKX-LABEL: zext_4xi1_to_4x32:
   1989 ; SKX:       ## BB#0:
   1990 ; SKX-NEXT:    vmovdqa64 {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
   1991 ; SKX-NEXT:    vpandq %xmm2, %xmm1, %xmm1
   1992 ; SKX-NEXT:    vpandq %xmm2, %xmm0, %xmm0
   1993 ; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
   1994 ; SKX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
   1995 ; SKX-NEXT:    retq
   1996   %mask = icmp eq <4 x i8> %x, %y
   1997   %1 = zext <4 x i1> %mask to <4 x i32>
   1998   ret <4 x i32> %1
   1999 }
   2000 
   2001 define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
   2002 ; KNL-LABEL: zext_2xi1_to_2xi64:
   2003 ; KNL:       ## BB#0:
   2004 ; KNL-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
   2005 ; KNL-NEXT:    vpand %xmm2, %xmm1, %xmm1
   2006 ; KNL-NEXT:    vpand %xmm2, %xmm0, %xmm0
   2007 ; KNL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
   2008 ; KNL-NEXT:    vpsrlq $63, %xmm0, %xmm0
   2009 ; KNL-NEXT:    retq
   2010 ;
   2011 ; SKX-LABEL: zext_2xi1_to_2xi64:
   2012 ; SKX:       ## BB#0:
   2013 ; SKX-NEXT:    vmovdqa64 {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
   2014 ; SKX-NEXT:    vpandq %xmm2, %xmm1, %xmm1
   2015 ; SKX-NEXT:    vpandq %xmm2, %xmm0, %xmm0
   2016 ; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k1
   2017 ; SKX-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
   2018 ; SKX-NEXT:    retq
   2019   %mask = icmp eq <2 x i8> %x, %y
   2020   %1 = zext <2 x i1> %mask to <2 x i64>
   2021   ret <2 x i64> %1
   2022 }
   2023