Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=SKX
      4 
      5 define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
      6 ; KNL-LABEL: zext_8x8mem_to_8x16:
      7 ; KNL:       # %bb.0:
      8 ; KNL-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
      9 ; KNL-NEXT:    vpsllw $15, %xmm0, %xmm0
     10 ; KNL-NEXT:    vpsraw $15, %xmm0, %xmm0
     11 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
     12 ; KNL-NEXT:    retq
     13 ;
     14 ; SKX-LABEL: zext_8x8mem_to_8x16:
     15 ; SKX:       # %bb.0:
     16 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
     17 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
     18 ; SKX-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
     19 ; SKX-NEXT:    retq
     20   %a   = load <8 x i8>,<8 x i8> *%i,align 1
     21   %x   = zext <8 x i8> %a to <8 x i16>
     22   %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
     23   ret <8 x i16> %ret
     24 }
     25 
     26 define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
     27 ; KNL-LABEL: sext_8x8mem_to_8x16:
     28 ; KNL:       # %bb.0:
     29 ; KNL-NEXT:    vpmovsxbw (%rdi), %xmm1
     30 ; KNL-NEXT:    vpsllw $15, %xmm0, %xmm0
     31 ; KNL-NEXT:    vpsraw $15, %xmm0, %xmm0
     32 ; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
     33 ; KNL-NEXT:    retq
     34 ;
     35 ; SKX-LABEL: sext_8x8mem_to_8x16:
     36 ; SKX:       # %bb.0:
     37 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
     38 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
     39 ; SKX-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z}
     40 ; SKX-NEXT:    retq
     41   %a   = load <8 x i8>,<8 x i8> *%i,align 1
     42   %x   = sext <8 x i8> %a to <8 x i16>
     43   %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
     44   ret <8 x i16> %ret
     45 }
     46 
     47 
     48 define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
     49 ; KNL-LABEL: zext_16x8mem_to_16x16:
     50 ; KNL:       # %bb.0:
     51 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
     52 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
     53 ; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
     54 ; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
     55 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
     56 ; KNL-NEXT:    retq
     57 ;
     58 ; SKX-LABEL: zext_16x8mem_to_16x16:
     59 ; SKX:       # %bb.0:
     60 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
     61 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
     62 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
     63 ; SKX-NEXT:    retq
     64   %a   = load <16 x i8>,<16 x i8> *%i,align 1
     65   %x   = zext <16 x i8> %a to <16 x i16>
     66   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
     67   ret <16 x i16> %ret
     68 }
     69 
     70 define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
     71 ; KNL-LABEL: sext_16x8mem_to_16x16:
     72 ; KNL:       # %bb.0:
     73 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
     74 ; KNL-NEXT:    vpmovsxbw (%rdi), %ymm1
     75 ; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
     76 ; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
     77 ; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
     78 ; KNL-NEXT:    retq
     79 ;
     80 ; SKX-LABEL: sext_16x8mem_to_16x16:
     81 ; SKX:       # %bb.0:
     82 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
     83 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
     84 ; SKX-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z}
     85 ; SKX-NEXT:    retq
     86   %a   = load <16 x i8>,<16 x i8> *%i,align 1
     87   %x   = sext <16 x i8> %a to <16 x i16>
     88   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
     89   ret <16 x i16> %ret
     90 }
     91 
     92 define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
     93 ; ALL-LABEL: zext_16x8_to_16x16:
     94 ; ALL:       # %bb.0:
     95 ; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
     96 ; ALL-NEXT:    retq
     97   %x   = zext <16 x i8> %a to <16 x i16>
     98   ret <16 x i16> %x
     99 }
    100 
    101 define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
    102 ; KNL-LABEL: zext_16x8_to_16x16_mask:
    103 ; KNL:       # %bb.0:
    104 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    105 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    106 ; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
    107 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
    108 ; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
    109 ; KNL-NEXT:    retq
    110 ;
    111 ; SKX-LABEL: zext_16x8_to_16x16_mask:
    112 ; SKX:       # %bb.0:
    113 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
    114 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
    115 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    116 ; SKX-NEXT:    retq
    117   %x   = zext <16 x i8> %a to <16 x i16>
    118   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
    119   ret <16 x i16> %ret
    120 }
    121 
    122 define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
    123 ; ALL-LABEL: sext_16x8_to_16x16:
    124 ; ALL:       # %bb.0:
    125 ; ALL-NEXT:    vpmovsxbw %xmm0, %ymm0
    126 ; ALL-NEXT:    retq
    127   %x   = sext <16 x i8> %a to <16 x i16>
    128   ret <16 x i16> %x
    129 }
    130 
    131 define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
    132 ; KNL-LABEL: sext_16x8_to_16x16_mask:
    133 ; KNL:       # %bb.0:
    134 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    135 ; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
    136 ; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
    137 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
    138 ; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
    139 ; KNL-NEXT:    retq
    140 ;
    141 ; SKX-LABEL: sext_16x8_to_16x16_mask:
    142 ; SKX:       # %bb.0:
    143 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
    144 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
    145 ; SKX-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z}
    146 ; SKX-NEXT:    retq
    147   %x   = sext <16 x i8> %a to <16 x i16>
    148   %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
    149   ret <16 x i16> %ret
    150 }
    151 
    152 define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
    153 ; KNL-LABEL: zext_32x8mem_to_32x16:
    154 ; KNL:       # %bb.0:
    155 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    156 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    157 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    158 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    159 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    160 ; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
    161 ; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
    162 ; KNL-NEXT:    vpand %ymm3, %ymm0, %ymm0
    163 ; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
    164 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
    165 ; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
    166 ; KNL-NEXT:    retq
    167 ;
    168 ; SKX-LABEL: zext_32x8mem_to_32x16:
    169 ; SKX:       # %bb.0:
    170 ; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
    171 ; SKX-NEXT:    vpmovb2m %ymm0, %k1
    172 ; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero
    173 ; SKX-NEXT:    retq
    174   %a   = load <32 x i8>,<32 x i8> *%i,align 1
    175   %x   = zext <32 x i8> %a to <32 x i16>
    176   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
    177   ret <32 x i16> %ret
    178 }
    179 
    180 define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
    181 ; KNL-LABEL: sext_32x8mem_to_32x16:
    182 ; KNL:       # %bb.0:
    183 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    184 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    185 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    186 ; KNL-NEXT:    vpmovsxbw 16(%rdi), %ymm2
    187 ; KNL-NEXT:    vpmovsxbw (%rdi), %ymm3
    188 ; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
    189 ; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
    190 ; KNL-NEXT:    vpand %ymm3, %ymm0, %ymm0
    191 ; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
    192 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
    193 ; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
    194 ; KNL-NEXT:    retq
    195 ;
    196 ; SKX-LABEL: sext_32x8mem_to_32x16:
    197 ; SKX:       # %bb.0:
    198 ; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
    199 ; SKX-NEXT:    vpmovb2m %ymm0, %k1
    200 ; SKX-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z}
    201 ; SKX-NEXT:    retq
    202   %a   = load <32 x i8>,<32 x i8> *%i,align 1
    203   %x   = sext <32 x i8> %a to <32 x i16>
    204   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
    205   ret <32 x i16> %ret
    206 }
    207 
    208 define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
    209 ; KNL-LABEL: zext_32x8_to_32x16:
    210 ; KNL:       # %bb.0:
    211 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    212 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    213 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    214 ; KNL-NEXT:    vmovdqa %ymm2, %ymm0
    215 ; KNL-NEXT:    retq
    216 ;
    217 ; SKX-LABEL: zext_32x8_to_32x16:
    218 ; SKX:       # %bb.0:
    219 ; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
    220 ; SKX-NEXT:    retq
    221   %x   = zext <32 x i8> %a to <32 x i16>
    222   ret <32 x i16> %x
    223 }
    224 
    225 define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
    226 ; KNL-LABEL: zext_32x8_to_32x16_mask:
    227 ; KNL:       # %bb.0:
    228 ; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
    229 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
    230 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    231 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm3
    232 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
    233 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    234 ; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
    235 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
    236 ; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
    237 ; KNL-NEXT:    vpsllw $15, %ymm2, %ymm1
    238 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
    239 ; KNL-NEXT:    vpand %ymm3, %ymm1, %ymm1
    240 ; KNL-NEXT:    retq
    241 ;
    242 ; SKX-LABEL: zext_32x8_to_32x16_mask:
    243 ; SKX:       # %bb.0:
    244 ; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
    245 ; SKX-NEXT:    vpmovb2m %ymm1, %k1
    246 ; SKX-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
    247 ; SKX-NEXT:    retq
    248   %x   = zext <32 x i8> %a to <32 x i16>
    249   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
    250   ret <32 x i16> %ret
    251 }
    252 
    253 define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
    254 ; KNL-LABEL: sext_32x8_to_32x16:
    255 ; KNL:       # %bb.0:
    256 ; KNL-NEXT:    vpmovsxbw %xmm0, %ymm2
    257 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
    258 ; KNL-NEXT:    vpmovsxbw %xmm0, %ymm1
    259 ; KNL-NEXT:    vmovdqa %ymm2, %ymm0
    260 ; KNL-NEXT:    retq
    261 ;
    262 ; SKX-LABEL: sext_32x8_to_32x16:
    263 ; SKX:       # %bb.0:
    264 ; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0
    265 ; SKX-NEXT:    retq
    266   %x   = sext <32 x i8> %a to <32 x i16>
    267   ret <32 x i16> %x
    268 }
    269 
    270 define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
    271 ; KNL-LABEL: sext_32x8_to_32x16_mask:
    272 ; KNL:       # %bb.0:
    273 ; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
    274 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
    275 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
    276 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm3
    277 ; KNL-NEXT:    vpmovsxbw %xmm3, %ymm3
    278 ; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
    279 ; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
    280 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
    281 ; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
    282 ; KNL-NEXT:    vpsllw $15, %ymm2, %ymm1
    283 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
    284 ; KNL-NEXT:    vpand %ymm3, %ymm1, %ymm1
    285 ; KNL-NEXT:    retq
    286 ;
    287 ; SKX-LABEL: sext_32x8_to_32x16_mask:
    288 ; SKX:       # %bb.0:
    289 ; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
    290 ; SKX-NEXT:    vpmovb2m %ymm1, %k1
    291 ; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z}
    292 ; SKX-NEXT:    retq
    293   %x   = sext <32 x i8> %a to <32 x i16>
    294   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
    295   ret <32 x i16> %ret
    296 }
    297 
    298 define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
    299 ; KNL-LABEL: zext_4x8mem_to_4x32:
    300 ; KNL:       # %bb.0:
    301 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    302 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    303 ; KNL-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    304 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    305 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    306 ; KNL-NEXT:    vzeroupper
    307 ; KNL-NEXT:    retq
    308 ;
    309 ; SKX-LABEL: zext_4x8mem_to_4x32:
    310 ; SKX:       # %bb.0:
    311 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    312 ; SKX-NEXT:    vpmovd2m %xmm0, %k1
    313 ; SKX-NEXT:    vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    314 ; SKX-NEXT:    retq
    315   %a   = load <4 x i8>,<4 x i8> *%i,align 1
    316   %x   = zext <4 x i8> %a to <4 x i32>
    317   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
    318   ret <4 x i32> %ret
    319 }
    320 
    321 define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
    322 ; KNL-LABEL: sext_4x8mem_to_4x32:
    323 ; KNL:       # %bb.0:
    324 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    325 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    326 ; KNL-NEXT:    vpmovsxbd (%rdi), %xmm0
    327 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    328 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    329 ; KNL-NEXT:    vzeroupper
    330 ; KNL-NEXT:    retq
    331 ;
    332 ; SKX-LABEL: sext_4x8mem_to_4x32:
    333 ; SKX:       # %bb.0:
    334 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    335 ; SKX-NEXT:    vpmovd2m %xmm0, %k1
    336 ; SKX-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z}
    337 ; SKX-NEXT:    retq
    338   %a   = load <4 x i8>,<4 x i8> *%i,align 1
    339   %x   = sext <4 x i8> %a to <4 x i32>
    340   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
    341   ret <4 x i32> %ret
    342 }
    343 
    344 define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
    345 ; KNL-LABEL: zext_8x8mem_to_8x32:
    346 ; KNL:       # %bb.0:
    347 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    348 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    349 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    350 ; KNL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    351 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    352 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    353 ; KNL-NEXT:    retq
    354 ;
    355 ; SKX-LABEL: zext_8x8mem_to_8x32:
    356 ; SKX:       # %bb.0:
    357 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    358 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    359 ; SKX-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    360 ; SKX-NEXT:    retq
    361   %a   = load <8 x i8>,<8 x i8> *%i,align 1
    362   %x   = zext <8 x i8> %a to <8 x i32>
    363   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    364   ret <8 x i32> %ret
    365 }
    366 
    367 define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
    368 ; KNL-LABEL: sext_8x8mem_to_8x32:
    369 ; KNL:       # %bb.0:
    370 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    371 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    372 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    373 ; KNL-NEXT:    vpmovsxbd (%rdi), %ymm0
    374 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    375 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    376 ; KNL-NEXT:    retq
    377 ;
    378 ; SKX-LABEL: sext_8x8mem_to_8x32:
    379 ; SKX:       # %bb.0:
    380 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    381 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    382 ; SKX-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z}
    383 ; SKX-NEXT:    retq
    384   %a   = load <8 x i8>,<8 x i8> *%i,align 1
    385   %x   = sext <8 x i8> %a to <8 x i32>
    386   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    387   ret <8 x i32> %ret
    388 }
    389 
    390 define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
    391 ; KNL-LABEL: zext_16x8mem_to_16x32:
    392 ; KNL:       # %bb.0:
    393 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    394 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    395 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    396 ; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
    397 ; KNL-NEXT:    retq
    398 ;
    399 ; SKX-LABEL: zext_16x8mem_to_16x32:
    400 ; SKX:       # %bb.0:
    401 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
    402 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
    403 ; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
    404 ; SKX-NEXT:    retq
    405   %a   = load <16 x i8>,<16 x i8> *%i,align 1
    406   %x   = zext <16 x i8> %a to <16 x i32>
    407   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    408   ret <16 x i32> %ret
    409 }
    410 
    411 define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
    412 ; KNL-LABEL: sext_16x8mem_to_16x32:
    413 ; KNL:       # %bb.0:
    414 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    415 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    416 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    417 ; KNL-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
    418 ; KNL-NEXT:    retq
    419 ;
    420 ; SKX-LABEL: sext_16x8mem_to_16x32:
    421 ; SKX:       # %bb.0:
    422 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
    423 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
    424 ; SKX-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
    425 ; SKX-NEXT:    retq
    426   %a   = load <16 x i8>,<16 x i8> *%i,align 1
    427   %x   = sext <16 x i8> %a to <16 x i32>
    428   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    429   ret <16 x i32> %ret
    430 }
    431 
    432 define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
    433 ; KNL-LABEL: zext_16x8_to_16x32_mask:
    434 ; KNL:       # %bb.0:
    435 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
    436 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
    437 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
    438 ; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    439 ; KNL-NEXT:    retq
    440 ;
    441 ; SKX-LABEL: zext_16x8_to_16x32_mask:
    442 ; SKX:       # %bb.0:
    443 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
    444 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
    445 ; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    446 ; SKX-NEXT:    retq
    447   %x   = zext <16 x i8> %a to <16 x i32>
    448   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    449   ret <16 x i32> %ret
    450 }
    451 
    452 define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
    453 ; KNL-LABEL: sext_16x8_to_16x32_mask:
    454 ; KNL:       # %bb.0:
    455 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
    456 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
    457 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
    458 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
    459 ; KNL-NEXT:    retq
    460 ;
    461 ; SKX-LABEL: sext_16x8_to_16x32_mask:
    462 ; SKX:       # %bb.0:
    463 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
    464 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
    465 ; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
    466 ; SKX-NEXT:    retq
    467   %x   = sext <16 x i8> %a to <16 x i32>
    468   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    469   ret <16 x i32> %ret
    470 }
    471 
    472 define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
    473 ; ALL-LABEL: zext_16x8_to_16x32:
    474 ; ALL:       # %bb.0:
    475 ; ALL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    476 ; ALL-NEXT:    retq
    477   %x = zext <16 x i8> %i to <16 x i32>
    478   ret <16 x i32> %x
    479 }
    480 
    481 define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
    482 ; ALL-LABEL: sext_16x8_to_16x32:
    483 ; ALL:       # %bb.0:
    484 ; ALL-NEXT:    vpmovsxbd %xmm0, %zmm0
    485 ; ALL-NEXT:    retq
    486   %x = sext <16 x i8> %i to <16 x i32>
    487   ret <16 x i32> %x
    488 }
    489 
    490 define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
    491 ; KNL-LABEL: zext_2x8mem_to_2x64:
    492 ; KNL:       # %bb.0:
    493 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
    494 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    495 ; KNL-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    496 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
    497 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    498 ; KNL-NEXT:    vzeroupper
    499 ; KNL-NEXT:    retq
    500 ;
    501 ; SKX-LABEL: zext_2x8mem_to_2x64:
    502 ; SKX:       # %bb.0:
    503 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
    504 ; SKX-NEXT:    vpmovq2m %xmm0, %k1
    505 ; SKX-NEXT:    vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
    506 ; SKX-NEXT:    retq
    507   %a   = load <2 x i8>,<2 x i8> *%i,align 1
    508   %x   = zext <2 x i8> %a to <2 x i64>
    509   %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
    510   ret <2 x i64> %ret
    511 }
    512 define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
    513 ; KNL-LABEL: sext_2x8mem_to_2x64mask:
    514 ; KNL:       # %bb.0:
    515 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
    516 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    517 ; KNL-NEXT:    vpmovsxbq (%rdi), %xmm0
    518 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
    519 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    520 ; KNL-NEXT:    vzeroupper
    521 ; KNL-NEXT:    retq
    522 ;
    523 ; SKX-LABEL: sext_2x8mem_to_2x64mask:
    524 ; SKX:       # %bb.0:
    525 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
    526 ; SKX-NEXT:    vpmovq2m %xmm0, %k1
    527 ; SKX-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z}
    528 ; SKX-NEXT:    retq
    529   %a   = load <2 x i8>,<2 x i8> *%i,align 1
    530   %x   = sext <2 x i8> %a to <2 x i64>
    531   %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
    532   ret <2 x i64> %ret
    533 }
    534 define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
    535 ; ALL-LABEL: sext_2x8mem_to_2x64:
    536 ; ALL:       # %bb.0:
    537 ; ALL-NEXT:    vpmovsxbq (%rdi), %xmm0
    538 ; ALL-NEXT:    retq
    539   %a   = load <2 x i8>,<2 x i8> *%i,align 1
    540   %x   = sext <2 x i8> %a to <2 x i64>
    541   ret <2 x i64> %x
    542 }
    543 
    544 define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
    545 ; KNL-LABEL: zext_4x8mem_to_4x64:
    546 ; KNL:       # %bb.0:
    547 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    548 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    549 ; KNL-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
    550 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
    551 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    552 ; KNL-NEXT:    retq
    553 ;
    554 ; SKX-LABEL: zext_4x8mem_to_4x64:
    555 ; SKX:       # %bb.0:
    556 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    557 ; SKX-NEXT:    vpmovd2m %xmm0, %k1
    558 ; SKX-NEXT:    vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
    559 ; SKX-NEXT:    retq
    560   %a   = load <4 x i8>,<4 x i8> *%i,align 1
    561   %x   = zext <4 x i8> %a to <4 x i64>
    562   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
    563   ret <4 x i64> %ret
    564 }
    565 
    566 define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
    567 ; KNL-LABEL: sext_4x8mem_to_4x64mask:
    568 ; KNL:       # %bb.0:
    569 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    570 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    571 ; KNL-NEXT:    vpmovsxbq (%rdi), %ymm0
    572 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
    573 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    574 ; KNL-NEXT:    retq
    575 ;
    576 ; SKX-LABEL: sext_4x8mem_to_4x64mask:
    577 ; SKX:       # %bb.0:
    578 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    579 ; SKX-NEXT:    vpmovd2m %xmm0, %k1
    580 ; SKX-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z}
    581 ; SKX-NEXT:    retq
    582   %a   = load <4 x i8>,<4 x i8> *%i,align 1
    583   %x   = sext <4 x i8> %a to <4 x i64>
    584   %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
    585   ret <4 x i64> %ret
    586 }
    587 
    588 define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
    589 ; ALL-LABEL: sext_4x8mem_to_4x64:
    590 ; ALL:       # %bb.0:
    591 ; ALL-NEXT:    vpmovsxbq (%rdi), %ymm0
    592 ; ALL-NEXT:    retq
    593   %a   = load <4 x i8>,<4 x i8> *%i,align 1
    594   %x   = sext <4 x i8> %a to <4 x i64>
    595   ret <4 x i64> %x
    596 }
    597 
    598 define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
    599 ; KNL-LABEL: zext_8x8mem_to_8x64:
    600 ; KNL:       # %bb.0:
    601 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    602 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    603 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    604 ; KNL-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
    605 ; KNL-NEXT:    retq
    606 ;
    607 ; SKX-LABEL: zext_8x8mem_to_8x64:
    608 ; SKX:       # %bb.0:
    609 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    610 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    611 ; SKX-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
    612 ; SKX-NEXT:    retq
    613   %a   = load <8 x i8>,<8 x i8> *%i,align 1
    614   %x   = zext <8 x i8> %a to <8 x i64>
    615   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
    616   ret <8 x i64> %ret
    617 }
    618 
    619 define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
    620 ; KNL-LABEL: sext_8x8mem_to_8x64mask:
    621 ; KNL:       # %bb.0:
    622 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    623 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    624 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    625 ; KNL-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
    626 ; KNL-NEXT:    retq
    627 ;
    628 ; SKX-LABEL: sext_8x8mem_to_8x64mask:
    629 ; SKX:       # %bb.0:
    630 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    631 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    632 ; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
    633 ; SKX-NEXT:    retq
    634   %a   = load <8 x i8>,<8 x i8> *%i,align 1
    635   %x   = sext <8 x i8> %a to <8 x i64>
    636   %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
    637   ret <8 x i64> %ret
    638 }
    639 
    640 define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
    641 ; ALL-LABEL: sext_8x8mem_to_8x64:
    642 ; ALL:       # %bb.0:
    643 ; ALL-NEXT:    vpmovsxbq (%rdi), %zmm0
    644 ; ALL-NEXT:    retq
    645   %a   = load <8 x i8>,<8 x i8> *%i,align 1
    646   %x   = sext <8 x i8> %a to <8 x i64>
    647   ret <8 x i64> %x
    648 }
    649 
    650 define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
    651 ; KNL-LABEL: zext_4x16mem_to_4x32:
    652 ; KNL:       # %bb.0:
    653 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    654 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    655 ; KNL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    656 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    657 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    658 ; KNL-NEXT:    vzeroupper
    659 ; KNL-NEXT:    retq
    660 ;
    661 ; SKX-LABEL: zext_4x16mem_to_4x32:
    662 ; SKX:       # %bb.0:
    663 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    664 ; SKX-NEXT:    vpmovd2m %xmm0, %k1
    665 ; SKX-NEXT:    vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
    666 ; SKX-NEXT:    retq
    667   %a   = load <4 x i16>,<4 x i16> *%i,align 1
    668   %x   = zext <4 x i16> %a to <4 x i32>
    669   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
    670   ret <4 x i32> %ret
    671 }
    672 
    673 define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
    674 ; KNL-LABEL: sext_4x16mem_to_4x32mask:
    675 ; KNL:       # %bb.0:
    676 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    677 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    678 ; KNL-NEXT:    vpmovsxwd (%rdi), %xmm0
    679 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    680 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    681 ; KNL-NEXT:    vzeroupper
    682 ; KNL-NEXT:    retq
    683 ;
    684 ; SKX-LABEL: sext_4x16mem_to_4x32mask:
    685 ; SKX:       # %bb.0:
    686 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    687 ; SKX-NEXT:    vpmovd2m %xmm0, %k1
    688 ; SKX-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z}
    689 ; SKX-NEXT:    retq
    690   %a   = load <4 x i16>,<4 x i16> *%i,align 1
    691   %x   = sext <4 x i16> %a to <4 x i32>
    692   %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
    693   ret <4 x i32> %ret
    694 }
    695 
    696 define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
    697 ; ALL-LABEL: sext_4x16mem_to_4x32:
    698 ; ALL:       # %bb.0:
    699 ; ALL-NEXT:    vpmovsxwd (%rdi), %xmm0
    700 ; ALL-NEXT:    retq
    701   %a   = load <4 x i16>,<4 x i16> *%i,align 1
    702   %x   = sext <4 x i16> %a to <4 x i32>
    703   ret <4 x i32> %x
    704 }
    705 
    706 
    707 define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
    708 ; KNL-LABEL: zext_8x16mem_to_8x32:
    709 ; KNL:       # %bb.0:
    710 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    711 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    712 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    713 ; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    714 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    715 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    716 ; KNL-NEXT:    retq
    717 ;
    718 ; SKX-LABEL: zext_8x16mem_to_8x32:
    719 ; SKX:       # %bb.0:
    720 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    721 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    722 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
    723 ; SKX-NEXT:    retq
    724   %a   = load <8 x i16>,<8 x i16> *%i,align 1
    725   %x   = zext <8 x i16> %a to <8 x i32>
    726   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    727   ret <8 x i32> %ret
    728 }
    729 
    730 define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
    731 ; KNL-LABEL: sext_8x16mem_to_8x32mask:
    732 ; KNL:       # %bb.0:
    733 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    734 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    735 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    736 ; KNL-NEXT:    vpmovsxwd (%rdi), %ymm0
    737 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    738 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    739 ; KNL-NEXT:    retq
    740 ;
    741 ; SKX-LABEL: sext_8x16mem_to_8x32mask:
    742 ; SKX:       # %bb.0:
    743 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    744 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    745 ; SKX-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z}
    746 ; SKX-NEXT:    retq
    747   %a   = load <8 x i16>,<8 x i16> *%i,align 1
    748   %x   = sext <8 x i16> %a to <8 x i32>
    749   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    750   ret <8 x i32> %ret
    751 }
    752 
    753 define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
    754 ; ALL-LABEL: sext_8x16mem_to_8x32:
    755 ; ALL:       # %bb.0:
    756 ; ALL-NEXT:    vpmovsxwd (%rdi), %ymm0
    757 ; ALL-NEXT:    retq
    758   %a   = load <8 x i16>,<8 x i16> *%i,align 1
    759   %x   = sext <8 x i16> %a to <8 x i32>
    760   ret <8 x i32> %x
    761 }
    762 
    763 define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
    764 ; KNL-LABEL: zext_8x16_to_8x32mask:
    765 ; KNL:       # %bb.0:
    766 ; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
    767 ; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
    768 ; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
    769 ; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    770 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    771 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    772 ; KNL-NEXT:    retq
    773 ;
    774 ; SKX-LABEL: zext_8x16_to_8x32mask:
    775 ; SKX:       # %bb.0:
    776 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
    777 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
    778 ; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    779 ; SKX-NEXT:    retq
    780   %x   = zext <8 x i16> %a to <8 x i32>
    781   %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
    782   ret <8 x i32> %ret
    783 }
    784 
    785 define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
    786 ; ALL-LABEL: zext_8x16_to_8x32:
    787 ; ALL:       # %bb.0:
    788 ; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    789 ; ALL-NEXT:    retq
    790   %x   = zext <8 x i16> %a to <8 x i32>
    791   ret <8 x i32> %x
    792 }
    793 
    794 define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
    795 ; KNL-LABEL: zext_16x16mem_to_16x32:
    796 ; KNL:       # %bb.0:
    797 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    798 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    799 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    800 ; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    801 ; KNL-NEXT:    retq
    802 ;
    803 ; SKX-LABEL: zext_16x16mem_to_16x32:
    804 ; SKX:       # %bb.0:
    805 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
    806 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
    807 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    808 ; SKX-NEXT:    retq
    809   %a   = load <16 x i16>,<16 x i16> *%i,align 1
    810   %x   = zext <16 x i16> %a to <16 x i32>
    811   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    812   ret <16 x i32> %ret
    813 }
    814 
    815 define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
    816 ; KNL-LABEL: sext_16x16mem_to_16x32mask:
    817 ; KNL:       # %bb.0:
    818 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    819 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    820 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    821 ; KNL-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
    822 ; KNL-NEXT:    retq
    823 ;
    824 ; SKX-LABEL: sext_16x16mem_to_16x32mask:
    825 ; SKX:       # %bb.0:
    826 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
    827 ; SKX-NEXT:    vpmovb2m %xmm0, %k1
    828 ; SKX-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
    829 ; SKX-NEXT:    retq
    830   %a   = load <16 x i16>,<16 x i16> *%i,align 1
    831   %x   = sext <16 x i16> %a to <16 x i32>
    832   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    833   ret <16 x i32> %ret
    834 }
    835 
    836 define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
    837 ; ALL-LABEL: sext_16x16mem_to_16x32:
    838 ; ALL:       # %bb.0:
    839 ; ALL-NEXT:    vpmovsxwd (%rdi), %zmm0
    840 ; ALL-NEXT:    retq
    841   %a   = load <16 x i16>,<16 x i16> *%i,align 1
    842   %x   = sext <16 x i16> %a to <16 x i32>
    843   ret <16 x i32> %x
    844 }
    845 define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
    846 ; KNL-LABEL: zext_16x16_to_16x32mask:
    847 ; KNL:       # %bb.0:
    848 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
    849 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
    850 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
    851 ; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    852 ; KNL-NEXT:    retq
    853 ;
    854 ; SKX-LABEL: zext_16x16_to_16x32mask:
    855 ; SKX:       # %bb.0:
    856 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
    857 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
    858 ; SKX-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    859 ; SKX-NEXT:    retq
    860   %x   = zext <16 x i16> %a to <16 x i32>
    861   %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
    862   ret <16 x i32> %ret
    863 }
    864 
    865 define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
    866 ; ALL-LABEL: zext_16x16_to_16x32:
    867 ; ALL:       # %bb.0:
    868 ; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    869 ; ALL-NEXT:    retq
    870   %x   = zext <16 x i16> %a to <16 x i32>
    871   ret <16 x i32> %x
    872 }
    873 
    874 define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
    875 ; KNL-LABEL: zext_2x16mem_to_2x64:
    876 ; KNL:       # %bb.0:
    877 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
    878 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    879 ; KNL-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    880 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
    881 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    882 ; KNL-NEXT:    vzeroupper
    883 ; KNL-NEXT:    retq
    884 ;
    885 ; SKX-LABEL: zext_2x16mem_to_2x64:
    886 ; SKX:       # %bb.0:
    887 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
    888 ; SKX-NEXT:    vpmovq2m %xmm0, %k1
    889 ; SKX-NEXT:    vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
    890 ; SKX-NEXT:    retq
    891   %a   = load <2 x i16>,<2 x i16> *%i,align 1
    892   %x   = zext <2 x i16> %a to <2 x i64>
    893   %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
    894   ret <2 x i64> %ret
    895 }
    896 
    897 define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
    898 ; KNL-LABEL: sext_2x16mem_to_2x64mask:
    899 ; KNL:       # %bb.0:
    900 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
    901 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    902 ; KNL-NEXT:    vpmovsxwq (%rdi), %xmm0
    903 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
    904 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    905 ; KNL-NEXT:    vzeroupper
    906 ; KNL-NEXT:    retq
    907 ;
    908 ; SKX-LABEL: sext_2x16mem_to_2x64mask:
    909 ; SKX:       # %bb.0:
    910 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
    911 ; SKX-NEXT:    vpmovq2m %xmm0, %k1
    912 ; SKX-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z}
    913 ; SKX-NEXT:    retq
    914   %a   = load <2 x i16>,<2 x i16> *%i,align 1
    915   %x   = sext <2 x i16> %a to <2 x i64>
    916   %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
    917   ret <2 x i64> %ret
    918 }
    919 
    920 define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
    921 ; ALL-LABEL: sext_2x16mem_to_2x64:
    922 ; ALL:       # %bb.0:
    923 ; ALL-NEXT:    vpmovsxwq (%rdi), %xmm0
    924 ; ALL-NEXT:    retq
    925   %a   = load <2 x i16>,<2 x i16> *%i,align 1
    926   %x   = sext <2 x i16> %a to <2 x i64>
    927   ret <2 x i64> %x
    928 }
    929 
    930 define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
    931 ; KNL-LABEL: zext_4x16mem_to_4x64:
    932 ; KNL:       # %bb.0:
    933 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    934 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    935 ; KNL-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    936 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
    937 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    938 ; KNL-NEXT:    retq
    939 ;
    940 ; SKX-LABEL: zext_4x16mem_to_4x64:
    941 ; SKX:       # %bb.0:
    942 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    943 ; SKX-NEXT:    vpmovd2m %xmm0, %k1
    944 ; SKX-NEXT:    vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
    945 ; SKX-NEXT:    retq
    946   %a   = load <4 x i16>,<4 x i16> *%i,align 1
    947   %x   = zext <4 x i16> %a to <4 x i64>
    948   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
    949   ret <4 x i64> %ret
    950 }
    951 
    952 define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
    953 ; KNL-LABEL: sext_4x16mem_to_4x64mask:
    954 ; KNL:       # %bb.0:
    955 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    956 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    957 ; KNL-NEXT:    vpmovsxwq (%rdi), %ymm0
    958 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
    959 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    960 ; KNL-NEXT:    retq
    961 ;
    962 ; SKX-LABEL: sext_4x16mem_to_4x64mask:
    963 ; SKX:       # %bb.0:
    964 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    965 ; SKX-NEXT:    vpmovd2m %xmm0, %k1
    966 ; SKX-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z}
    967 ; SKX-NEXT:    retq
    968   %a   = load <4 x i16>,<4 x i16> *%i,align 1
    969   %x   = sext <4 x i16> %a to <4 x i64>
    970   %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
    971   ret <4 x i64> %ret
    972 }
    973 
    974 define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
    975 ; ALL-LABEL: sext_4x16mem_to_4x64:
    976 ; ALL:       # %bb.0:
    977 ; ALL-NEXT:    vpmovsxwq (%rdi), %ymm0
    978 ; ALL-NEXT:    retq
    979   %a   = load <4 x i16>,<4 x i16> *%i,align 1
    980   %x   = sext <4 x i16> %a to <4 x i64>
    981   ret <4 x i64> %x
    982 }
    983 
    984 define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
    985 ; KNL-LABEL: zext_8x16mem_to_8x64:
    986 ; KNL:       # %bb.0:
    987 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    988 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    989 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
    990 ; KNL-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    991 ; KNL-NEXT:    retq
    992 ;
    993 ; SKX-LABEL: zext_8x16mem_to_8x64:
    994 ; SKX:       # %bb.0:
    995 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    996 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
    997 ; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
    998 ; SKX-NEXT:    retq
    999   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   1000   %x   = zext <8 x i16> %a to <8 x i64>
   1001   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   1002   ret <8 x i64> %ret
   1003 }
   1004 
   1005 define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
   1006 ; KNL-LABEL: sext_8x16mem_to_8x64mask:
   1007 ; KNL:       # %bb.0:
   1008 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   1009 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   1010 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
   1011 ; KNL-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
   1012 ; KNL-NEXT:    retq
   1013 ;
   1014 ; SKX-LABEL: sext_8x16mem_to_8x64mask:
   1015 ; SKX:       # %bb.0:
   1016 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   1017 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
   1018 ; SKX-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
   1019 ; SKX-NEXT:    retq
   1020   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   1021   %x   = sext <8 x i16> %a to <8 x i64>
   1022   %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   1023   ret <8 x i64> %ret
   1024 }
   1025 
   1026 define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
   1027 ; ALL-LABEL: sext_8x16mem_to_8x64:
   1028 ; ALL:       # %bb.0:
   1029 ; ALL-NEXT:    vpmovsxwq (%rdi), %zmm0
   1030 ; ALL-NEXT:    retq
   1031   %a   = load <8 x i16>,<8 x i16> *%i,align 1
   1032   %x   = sext <8 x i16> %a to <8 x i64>
   1033   ret <8 x i64> %x
   1034 }
   1035 
   1036 define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
   1037 ; KNL-LABEL: zext_8x16_to_8x64mask:
   1038 ; KNL:       # %bb.0:
   1039 ; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
   1040 ; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
   1041 ; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
   1042 ; KNL-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1043 ; KNL-NEXT:    retq
   1044 ;
   1045 ; SKX-LABEL: zext_8x16_to_8x64mask:
   1046 ; SKX:       # %bb.0:
   1047 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
   1048 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
   1049 ; SKX-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1050 ; SKX-NEXT:    retq
   1051   %x   = zext <8 x i16> %a to <8 x i64>
   1052   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   1053   ret <8 x i64> %ret
   1054 }
   1055 
   1056 define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
   1057 ; ALL-LABEL: zext_8x16_to_8x64:
   1058 ; ALL:       # %bb.0:
   1059 ; ALL-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1060 ; ALL-NEXT:    retq
   1061   %ret   = zext <8 x i16> %a to <8 x i64>
   1062   ret <8 x i64> %ret
   1063 }
   1064 
   1065 define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
   1066 ; KNL-LABEL: zext_2x32mem_to_2x64:
   1067 ; KNL:       # %bb.0:
   1068 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
   1069 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
   1070 ; KNL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
   1071 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
   1072 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
   1073 ; KNL-NEXT:    vzeroupper
   1074 ; KNL-NEXT:    retq
   1075 ;
   1076 ; SKX-LABEL: zext_2x32mem_to_2x64:
   1077 ; SKX:       # %bb.0:
   1078 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
   1079 ; SKX-NEXT:    vpmovq2m %xmm0, %k1
   1080 ; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
   1081 ; SKX-NEXT:    retq
   1082   %a   = load <2 x i32>,<2 x i32> *%i,align 1
   1083   %x   = zext <2 x i32> %a to <2 x i64>
   1084   %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
   1085   ret <2 x i64> %ret
   1086 }
   1087 
   1088 define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
   1089 ; KNL-LABEL: sext_2x32mem_to_2x64mask:
   1090 ; KNL:       # %bb.0:
   1091 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
   1092 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
   1093 ; KNL-NEXT:    vpmovsxdq (%rdi), %xmm0
   1094 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
   1095 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
   1096 ; KNL-NEXT:    vzeroupper
   1097 ; KNL-NEXT:    retq
   1098 ;
   1099 ; SKX-LABEL: sext_2x32mem_to_2x64mask:
   1100 ; SKX:       # %bb.0:
   1101 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
   1102 ; SKX-NEXT:    vpmovq2m %xmm0, %k1
   1103 ; SKX-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z}
   1104 ; SKX-NEXT:    retq
   1105   %a   = load <2 x i32>,<2 x i32> *%i,align 1
   1106   %x   = sext <2 x i32> %a to <2 x i64>
   1107   %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
   1108   ret <2 x i64> %ret
   1109 }
   1110 
   1111 define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
   1112 ; ALL-LABEL: sext_2x32mem_to_2x64:
   1113 ; ALL:       # %bb.0:
   1114 ; ALL-NEXT:    vpmovsxdq (%rdi), %xmm0
   1115 ; ALL-NEXT:    retq
   1116   %a   = load <2 x i32>,<2 x i32> *%i,align 1
   1117   %x   = sext <2 x i32> %a to <2 x i64>
   1118   ret <2 x i64> %x
   1119 }
   1120 
   1121 define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
   1122 ; KNL-LABEL: zext_4x32mem_to_4x64:
   1123 ; KNL:       # %bb.0:
   1124 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
   1125 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
   1126 ; KNL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1127 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
   1128 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
   1129 ; KNL-NEXT:    retq
   1130 ;
   1131 ; SKX-LABEL: zext_4x32mem_to_4x64:
   1132 ; SKX:       # %bb.0:
   1133 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
   1134 ; SKX-NEXT:    vpmovd2m %xmm0, %k1
   1135 ; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
   1136 ; SKX-NEXT:    retq
   1137   %a   = load <4 x i32>,<4 x i32> *%i,align 1
   1138   %x   = zext <4 x i32> %a to <4 x i64>
   1139   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   1140   ret <4 x i64> %ret
   1141 }
   1142 
   1143 define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
   1144 ; KNL-LABEL: sext_4x32mem_to_4x64mask:
   1145 ; KNL:       # %bb.0:
   1146 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
   1147 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
   1148 ; KNL-NEXT:    vpmovsxdq (%rdi), %ymm0
   1149 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
   1150 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
   1151 ; KNL-NEXT:    retq
   1152 ;
   1153 ; SKX-LABEL: sext_4x32mem_to_4x64mask:
   1154 ; SKX:       # %bb.0:
   1155 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
   1156 ; SKX-NEXT:    vpmovd2m %xmm0, %k1
   1157 ; SKX-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z}
   1158 ; SKX-NEXT:    retq
   1159   %a   = load <4 x i32>,<4 x i32> *%i,align 1
   1160   %x   = sext <4 x i32> %a to <4 x i64>
   1161   %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   1162   ret <4 x i64> %ret
   1163 }
   1164 
   1165 define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
   1166 ; ALL-LABEL: sext_4x32mem_to_4x64:
   1167 ; ALL:       # %bb.0:
   1168 ; ALL-NEXT:    vpmovsxdq (%rdi), %ymm0
   1169 ; ALL-NEXT:    retq
   1170   %a   = load <4 x i32>,<4 x i32> *%i,align 1
   1171   %x   = sext <4 x i32> %a to <4 x i64>
   1172   ret <4 x i64> %x
   1173 }
   1174 
   1175 define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
   1176 ; ALL-LABEL: sext_4x32_to_4x64:
   1177 ; ALL:       # %bb.0:
   1178 ; ALL-NEXT:    vpmovsxdq %xmm0, %ymm0
   1179 ; ALL-NEXT:    retq
   1180   %x   = sext <4 x i32> %a to <4 x i64>
   1181   ret <4 x i64> %x
   1182 }
   1183 
   1184 define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
   1185 ; KNL-LABEL: zext_4x32_to_4x64mask:
   1186 ; KNL:       # %bb.0:
   1187 ; KNL-NEXT:    vpslld $31, %xmm1, %xmm1
   1188 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
   1189 ; KNL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1190 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
   1191 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
   1192 ; KNL-NEXT:    retq
   1193 ;
   1194 ; SKX-LABEL: zext_4x32_to_4x64mask:
   1195 ; SKX:       # %bb.0:
   1196 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
   1197 ; SKX-NEXT:    vpmovd2m %xmm1, %k1
   1198 ; SKX-NEXT:    vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1199 ; SKX-NEXT:    retq
   1200   %x   = zext <4 x i32> %a to <4 x i64>
   1201   %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
   1202   ret <4 x i64> %ret
   1203 }
   1204 
   1205 define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
   1206 ; KNL-LABEL: zext_8x32mem_to_8x64:
   1207 ; KNL:       # %bb.0:
   1208 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   1209 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   1210 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
   1211 ; KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
   1212 ; KNL-NEXT:    retq
   1213 ;
   1214 ; SKX-LABEL: zext_8x32mem_to_8x64:
   1215 ; SKX:       # %bb.0:
   1216 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   1217 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
   1218 ; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
   1219 ; SKX-NEXT:    retq
   1220   %a   = load <8 x i32>,<8 x i32> *%i,align 1
   1221   %x   = zext <8 x i32> %a to <8 x i64>
   1222   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   1223   ret <8 x i64> %ret
   1224 }
   1225 
   1226 define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
   1227 ; KNL-LABEL: sext_8x32mem_to_8x64mask:
   1228 ; KNL:       # %bb.0:
   1229 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   1230 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   1231 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
   1232 ; KNL-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
   1233 ; KNL-NEXT:    retq
   1234 ;
   1235 ; SKX-LABEL: sext_8x32mem_to_8x64mask:
   1236 ; SKX:       # %bb.0:
   1237 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   1238 ; SKX-NEXT:    vpmovw2m %xmm0, %k1
   1239 ; SKX-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
   1240 ; SKX-NEXT:    retq
   1241   %a   = load <8 x i32>,<8 x i32> *%i,align 1
   1242   %x   = sext <8 x i32> %a to <8 x i64>
   1243   %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   1244   ret <8 x i64> %ret
   1245 }
   1246 
   1247 define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
   1248 ; ALL-LABEL: sext_8x32mem_to_8x64:
   1249 ; ALL:       # %bb.0:
   1250 ; ALL-NEXT:    vpmovsxdq (%rdi), %zmm0
   1251 ; ALL-NEXT:    retq
   1252   %a   = load <8 x i32>,<8 x i32> *%i,align 1
   1253   %x   = sext <8 x i32> %a to <8 x i64>
   1254   ret <8 x i64> %x
   1255 }
   1256 
   1257 define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
   1258 ; ALL-LABEL: sext_8x32_to_8x64:
   1259 ; ALL:       # %bb.0:
   1260 ; ALL-NEXT:    vpmovsxdq %ymm0, %zmm0
   1261 ; ALL-NEXT:    retq
   1262   %x   = sext <8 x i32> %a to <8 x i64>
   1263   ret <8 x i64> %x
   1264 }
   1265 
   1266 define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
   1267 ; KNL-LABEL: zext_8x32_to_8x64mask:
   1268 ; KNL:       # %bb.0:
   1269 ; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
   1270 ; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
   1271 ; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
   1272 ; KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
   1273 ; KNL-NEXT:    retq
   1274 ;
   1275 ; SKX-LABEL: zext_8x32_to_8x64mask:
   1276 ; SKX:       # %bb.0:
   1277 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
   1278 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
   1279 ; SKX-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
   1280 ; SKX-NEXT:    retq
   1281   %x   = zext <8 x i32> %a to <8 x i64>
   1282   %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
   1283   ret <8 x i64> %ret
   1284 }
   1285 define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
   1286 ; ALL-LABEL: fptrunc_test:
   1287 ; ALL:       # %bb.0:
   1288 ; ALL-NEXT:    vcvtpd2ps %zmm0, %ymm0
   1289 ; ALL-NEXT:    retq
   1290   %b = fptrunc <8 x double> %a to <8 x float>
   1291   ret <8 x float> %b
   1292 }
   1293 
   1294 define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
   1295 ; ALL-LABEL: fpext_test:
   1296 ; ALL:       # %bb.0:
   1297 ; ALL-NEXT:    vcvtps2pd %ymm0, %zmm0
   1298 ; ALL-NEXT:    retq
   1299   %b = fpext <8 x float> %a to <8 x double>
   1300   ret <8 x double> %b
   1301 }
   1302 
   1303 define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
   1304 ; KNL-LABEL: zext_16i1_to_16xi32:
   1305 ; KNL:       # %bb.0:
   1306 ; KNL-NEXT:    kmovw %edi, %k1
   1307 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   1308 ; KNL-NEXT:    vpsrld $31, %zmm0, %zmm0
   1309 ; KNL-NEXT:    retq
   1310 ;
   1311 ; SKX-LABEL: zext_16i1_to_16xi32:
   1312 ; SKX:       # %bb.0:
   1313 ; SKX-NEXT:    kmovd %edi, %k0
   1314 ; SKX-NEXT:    vpmovm2d %k0, %zmm0
   1315 ; SKX-NEXT:    vpsrld $31, %zmm0, %zmm0
   1316 ; SKX-NEXT:    retq
   1317   %a = bitcast i16 %b to <16 x i1>
   1318   %c = zext <16 x i1> %a to <16 x i32>
   1319   ret <16 x i32> %c
   1320 }
   1321 
   1322 define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
   1323 ; KNL-LABEL: zext_8i1_to_8xi64:
   1324 ; KNL:       # %bb.0:
   1325 ; KNL-NEXT:    kmovw %edi, %k1
   1326 ; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   1327 ; KNL-NEXT:    vpsrlq $63, %zmm0, %zmm0
   1328 ; KNL-NEXT:    retq
   1329 ;
   1330 ; SKX-LABEL: zext_8i1_to_8xi64:
   1331 ; SKX:       # %bb.0:
   1332 ; SKX-NEXT:    kmovd %edi, %k0
   1333 ; SKX-NEXT:    vpmovm2q %k0, %zmm0
   1334 ; SKX-NEXT:    vpsrlq $63, %zmm0, %zmm0
   1335 ; SKX-NEXT:    retq
   1336   %a = bitcast i8 %b to <8 x i1>
   1337   %c = zext <8 x i1> %a to <8 x i64>
   1338   ret <8 x i64> %c
   1339 }
   1340 
   1341 define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
   1342 ; KNL-LABEL: trunc_16i8_to_16i1:
   1343 ; KNL:       # %bb.0:
   1344 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
   1345 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   1346 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   1347 ; KNL-NEXT:    kmovw %k0, %eax
   1348 ; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
   1349 ; KNL-NEXT:    vzeroupper
   1350 ; KNL-NEXT:    retq
   1351 ;
   1352 ; SKX-LABEL: trunc_16i8_to_16i1:
   1353 ; SKX:       # %bb.0:
   1354 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
   1355 ; SKX-NEXT:    vpmovb2m %xmm0, %k0
   1356 ; SKX-NEXT:    kmovd %k0, %eax
   1357 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
   1358 ; SKX-NEXT:    retq
   1359   %mask_b = trunc <16 x i8>%a to <16 x i1>
   1360   %mask = bitcast <16 x i1> %mask_b to i16
   1361   ret i16 %mask
   1362 }
   1363 
   1364 define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
   1365 ; KNL-LABEL: trunc_16i32_to_16i1:
   1366 ; KNL:       # %bb.0:
   1367 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   1368 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   1369 ; KNL-NEXT:    kmovw %k0, %eax
   1370 ; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
   1371 ; KNL-NEXT:    vzeroupper
   1372 ; KNL-NEXT:    retq
   1373 ;
   1374 ; SKX-LABEL: trunc_16i32_to_16i1:
   1375 ; SKX:       # %bb.0:
   1376 ; SKX-NEXT:    vpslld $31, %zmm0, %zmm0
   1377 ; SKX-NEXT:    vpmovd2m %zmm0, %k0
   1378 ; SKX-NEXT:    kmovd %k0, %eax
   1379 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
   1380 ; SKX-NEXT:    vzeroupper
   1381 ; SKX-NEXT:    retq
   1382   %mask_b = trunc <16 x i32>%a to <16 x i1>
   1383   %mask = bitcast <16 x i1> %mask_b to i16
   1384   ret i16 %mask
   1385 }
   1386 
   1387 define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
   1388 ; ALL-LABEL: trunc_4i32_to_4i1:
   1389 ; ALL:       # %bb.0:
   1390 ; ALL-NEXT:    vpand %xmm1, %xmm0, %xmm0
   1391 ; ALL-NEXT:    vpslld $31, %xmm0, %xmm0
   1392 ; ALL-NEXT:    vpsrad $31, %xmm0, %xmm0
   1393 ; ALL-NEXT:    retq
   1394   %mask_a = trunc <4 x i32>%a to <4 x i1>
   1395   %mask_b = trunc <4 x i32>%b to <4 x i1>
   1396   %a_and_b = and <4 x i1>%mask_a, %mask_b
   1397   %res = sext <4 x i1>%a_and_b to <4 x i32>
   1398   ret <4 x i32>%res
   1399 }
   1400 
   1401 
   1402 define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
   1403 ; KNL-LABEL: trunc_8i16_to_8i1:
   1404 ; KNL:       # %bb.0:
   1405 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   1406 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   1407 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
   1408 ; KNL-NEXT:    kmovw %k0, %eax
   1409 ; KNL-NEXT:    # kill: def $al killed $al killed $eax
   1410 ; KNL-NEXT:    vzeroupper
   1411 ; KNL-NEXT:    retq
   1412 ;
   1413 ; SKX-LABEL: trunc_8i16_to_8i1:
   1414 ; SKX:       # %bb.0:
   1415 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   1416 ; SKX-NEXT:    vpmovw2m %xmm0, %k0
   1417 ; SKX-NEXT:    kmovd %k0, %eax
   1418 ; SKX-NEXT:    # kill: def $al killed $al killed $eax
   1419 ; SKX-NEXT:    retq
   1420   %mask_b = trunc <8 x i16>%a to <8 x i1>
   1421   %mask = bitcast <8 x i1> %mask_b to i8
   1422   ret i8 %mask
   1423 }
   1424 
   1425 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   1426 ; KNL-LABEL: sext_8i1_8i32:
   1427 ; KNL:       # %bb.0:
   1428 ; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
   1429 ; KNL-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
   1430 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
   1431 ; KNL-NEXT:    retq
   1432 ;
   1433 ; SKX-LABEL: sext_8i1_8i32:
   1434 ; SKX:       # %bb.0:
   1435 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
   1436 ; SKX-NEXT:    vpternlogq $15, %ymm0, %ymm0, %ymm0
   1437 ; SKX-NEXT:    retq
   1438   %x = icmp slt <8 x i32> %a1, %a2
   1439   %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
   1440   %y = sext <8 x i1> %x1 to <8 x i32>
   1441   ret <8 x i32> %y
   1442 }
   1443 
   1444 
   1445 define i16 @trunc_i32_to_i1(i32 %a) {
   1446 ; KNL-LABEL: trunc_i32_to_i1:
   1447 ; KNL:       # %bb.0:
   1448 ; KNL-NEXT:    movw $-4, %ax
   1449 ; KNL-NEXT:    kmovw %eax, %k0
   1450 ; KNL-NEXT:    kshiftrw $1, %k0, %k0
   1451 ; KNL-NEXT:    kshiftlw $1, %k0, %k0
   1452 ; KNL-NEXT:    andl $1, %edi
   1453 ; KNL-NEXT:    kmovw %edi, %k1
   1454 ; KNL-NEXT:    korw %k1, %k0, %k0
   1455 ; KNL-NEXT:    kmovw %k0, %eax
   1456 ; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
   1457 ; KNL-NEXT:    retq
   1458 ;
   1459 ; SKX-LABEL: trunc_i32_to_i1:
   1460 ; SKX:       # %bb.0:
   1461 ; SKX-NEXT:    movw $-4, %ax
   1462 ; SKX-NEXT:    kmovd %eax, %k0
   1463 ; SKX-NEXT:    kshiftrw $1, %k0, %k0
   1464 ; SKX-NEXT:    kshiftlw $1, %k0, %k0
   1465 ; SKX-NEXT:    andl $1, %edi
   1466 ; SKX-NEXT:    kmovw %edi, %k1
   1467 ; SKX-NEXT:    korw %k1, %k0, %k0
   1468 ; SKX-NEXT:    kmovd %k0, %eax
   1469 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
   1470 ; SKX-NEXT:    retq
   1471   %a_i = trunc i32 %a to i1
   1472   %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
   1473   %res = bitcast <16 x i1> %maskv to i16
   1474   ret i16 %res
   1475 }
   1476 
   1477 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   1478 ; KNL-LABEL: sext_8i1_8i16:
   1479 ; KNL:       # %bb.0:
   1480 ; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
   1481 ; KNL-NEXT:    vpmovdw %zmm0, %ymm0
   1482 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
   1483 ; KNL-NEXT:    vzeroupper
   1484 ; KNL-NEXT:    retq
   1485 ;
   1486 ; SKX-LABEL: sext_8i1_8i16:
   1487 ; SKX:       # %bb.0:
   1488 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
   1489 ; SKX-NEXT:    vpmovm2w %k0, %xmm0
   1490 ; SKX-NEXT:    vzeroupper
   1491 ; SKX-NEXT:    retq
   1492   %x = icmp slt <8 x i32> %a1, %a2
   1493   %y = sext <8 x i1> %x to <8 x i16>
   1494   ret <8 x i16> %y
   1495 }
   1496 
   1497 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
   1498 ; KNL-LABEL: sext_16i1_16i32:
   1499 ; KNL:       # %bb.0:
   1500 ; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
   1501 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   1502 ; KNL-NEXT:    retq
   1503 ;
   1504 ; SKX-LABEL: sext_16i1_16i32:
   1505 ; SKX:       # %bb.0:
   1506 ; SKX-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
   1507 ; SKX-NEXT:    vpmovm2d %k0, %zmm0
   1508 ; SKX-NEXT:    retq
   1509   %x = icmp slt <16 x i32> %a1, %a2
   1510   %y = sext <16 x i1> %x to <16 x i32>
   1511   ret <16 x i32> %y
   1512 }
   1513 
   1514 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   1515 ; KNL-LABEL: sext_8i1_8i64:
   1516 ; KNL:       # %bb.0:
   1517 ; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
   1518 ; KNL-NEXT:    vpmovsxdq %ymm0, %zmm0
   1519 ; KNL-NEXT:    retq
   1520 ;
   1521 ; SKX-LABEL: sext_8i1_8i64:
   1522 ; SKX:       # %bb.0:
   1523 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
   1524 ; SKX-NEXT:    vpmovm2q %k0, %zmm0
   1525 ; SKX-NEXT:    retq
   1526   %x = icmp slt <8 x i32> %a1, %a2
   1527   %y = sext <8 x i1> %x to <8 x i64>
   1528   ret <8 x i64> %y
   1529 }
   1530 
   1531 define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
   1532 ; ALL-LABEL: extload_v8i64:
   1533 ; ALL:       # %bb.0:
   1534 ; ALL-NEXT:    vpmovsxbq (%rdi), %zmm0
   1535 ; ALL-NEXT:    vmovdqa64 %zmm0, (%rsi)
   1536 ; ALL-NEXT:    vzeroupper
   1537 ; ALL-NEXT:    retq
   1538   %sign_load = load <8 x i8>, <8 x i8>* %a
   1539   %c = sext <8 x i8> %sign_load to <8 x i64>
   1540   store <8 x i64> %c, <8 x i64>* %res
   1541   ret void
   1542 }
   1543 
   1544 define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
   1545 ; KNL-LABEL: test21:
   1546 ; KNL:       # %bb.0:
   1547 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero
   1548 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
   1549 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
   1550 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
   1551 ; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
   1552 ; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
   1553 ; KNL-NEXT:    vpand %ymm0, %ymm4, %ymm0
   1554 ; KNL-NEXT:    vpsllw $15, %ymm5, %ymm4
   1555 ; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
   1556 ; KNL-NEXT:    vpand %ymm1, %ymm4, %ymm1
   1557 ; KNL-NEXT:    vpsllw $15, %ymm6, %ymm4
   1558 ; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
   1559 ; KNL-NEXT:    vpand %ymm2, %ymm4, %ymm2
   1560 ; KNL-NEXT:    vpsllw $15, %ymm7, %ymm4
   1561 ; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
   1562 ; KNL-NEXT:    vpand %ymm3, %ymm4, %ymm3
   1563 ; KNL-NEXT:    retq
   1564 ;
   1565 ; SKX-LABEL: test21:
   1566 ; SKX:       # %bb.0:
   1567 ; SKX-NEXT:    vpsllw $7, %zmm2, %zmm2
   1568 ; SKX-NEXT:    vpmovb2m %zmm2, %k1
   1569 ; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
   1570 ; SKX-NEXT:    kshiftrq $32, %k1, %k1
   1571 ; SKX-NEXT:    vmovdqu16 %zmm1, %zmm1 {%k1} {z}
   1572 ; SKX-NEXT:    retq
   1573   %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
   1574   ret <64 x i16> %ret
   1575 }
   1576 
   1577 define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
   1578 ; ALL-LABEL: shuffle_zext_16x8_to_16x16:
   1579 ; ALL:       # %bb.0:
   1580 ; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   1581 ; ALL-NEXT:    retq
   1582   %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
   1583   %2 = bitcast <32 x i8> %1 to <16 x i16>
   1584   ret <16 x i16> %2
   1585 }
   1586 
   1587 define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone {
   1588 ; KNL-LABEL: shuffle_zext_16x8_to_16x16_mask:
   1589 ; KNL:       # %bb.0:
   1590 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
   1591 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   1592 ; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
   1593 ; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
   1594 ; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
   1595 ; KNL-NEXT:    retq
   1596 ;
   1597 ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask:
   1598 ; SKX:       # %bb.0:
   1599 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
   1600 ; SKX-NEXT:    vpmovb2m %xmm1, %k1
   1601 ; SKX-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   1602 ; SKX-NEXT:    retq
   1603   %x   = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
   1604   %bc  = bitcast <32 x i8> %x to <16 x i16>
   1605   %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer
   1606   ret <16 x i16> %ret
   1607 }
   1608 
   1609 define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
   1610 ; ALL-LABEL: zext_32x8_to_16x16:
   1611 ; ALL:       # %bb.0:
   1612 ; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   1613 ; ALL-NEXT:    retq
   1614   %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
   1615   %2 = bitcast <32 x i8> %1 to <16 x i16>
   1616   ret <16 x i16> %2
   1617 }
   1618 
   1619 define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
   1620 ; ALL-LABEL: zext_32x8_to_8x32:
   1621 ; ALL:       # %bb.0:
   1622 ; ALL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   1623 ; ALL-NEXT:    retq
   1624   %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
   1625   %2 = bitcast <32 x i8> %1 to <8 x i32>
   1626   ret <8 x i32> %2
   1627 }
   1628 
   1629 define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
   1630 ; ALL-LABEL: zext_32x8_to_4x64:
   1631 ; ALL:       # %bb.0:
   1632 ; ALL-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   1633 ; ALL-NEXT:    retq
   1634   %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
   1635   %2 = bitcast <32 x i8> %1 to <4 x i64>
   1636   ret <4 x i64> %2
   1637 }
   1638 
   1639 define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
   1640 ; ALL-LABEL: zext_16x16_to_8x32:
   1641 ; ALL:       # %bb.0:
   1642 ; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1643 ; ALL-NEXT:    retq
   1644   %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
   1645   %2 = bitcast <16 x i16> %1 to <8 x i32>
   1646   ret <8 x i32> %2
   1647 }
   1648 
   1649 define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
   1650 ; ALL-LABEL: zext_16x16_to_4x64:
   1651 ; ALL:       # %bb.0:
   1652 ; ALL-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1653 ; ALL-NEXT:    retq
   1654   %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
   1655   %2 = bitcast <16 x i16> %1 to <4 x i64>
   1656   ret <4 x i64> %2
   1657 }
   1658 
   1659 define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
   1660 ; ALL-LABEL: zext_8x32_to_4x64:
   1661 ; ALL:       # %bb.0:
   1662 ; ALL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1663 ; ALL-NEXT:    retq
   1664   %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
   1665   %2 = bitcast <8 x i32> %1 to <4 x i64>
   1666   ret <4 x i64> %2
   1667 }
   1668 
   1669 define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
   1670 ; KNL-LABEL: zext_64xi1_to_64xi8:
   1671 ; KNL:       # %bb.0:
   1672 ; KNL-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
   1673 ; KNL-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   1674 ; KNL-NEXT:    vpand %ymm2, %ymm0, %ymm0
   1675 ; KNL-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
   1676 ; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
   1677 ; KNL-NEXT:    retq
   1678 ;
   1679 ; SKX-LABEL: zext_64xi1_to_64xi8:
   1680 ; SKX:       # %bb.0:
   1681 ; SKX-NEXT:    vpcmpeqb %zmm1, %zmm0, %k1
   1682 ; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
   1683 ; SKX-NEXT:    retq
   1684   %mask = icmp eq <64 x i8> %x, %y
   1685   %1 = zext <64 x i1> %mask to <64 x i8>
   1686   ret <64 x i8> %1
   1687 }
   1688 
   1689 define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
   1690 ; KNL-LABEL: zext_32xi1_to_32xi16:
   1691 ; KNL:       # %bb.0:
   1692 ; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
   1693 ; KNL-NEXT:    vpsrlw $15, %ymm0, %ymm0
   1694 ; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
   1695 ; KNL-NEXT:    vpsrlw $15, %ymm1, %ymm1
   1696 ; KNL-NEXT:    retq
   1697 ;
   1698 ; SKX-LABEL: zext_32xi1_to_32xi16:
   1699 ; SKX:       # %bb.0:
   1700 ; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
   1701 ; SKX-NEXT:    vpmovm2w %k0, %zmm0
   1702 ; SKX-NEXT:    vpsrlw $15, %zmm0, %zmm0
   1703 ; SKX-NEXT:    retq
   1704   %mask = icmp eq <32 x i16> %x, %y
   1705   %1 = zext <32 x i1> %mask to <32 x i16>
   1706   ret <32 x i16> %1
   1707 }
   1708 
   1709 define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
   1710 ; ALL-LABEL: zext_16xi1_to_16xi16:
   1711 ; ALL:       # %bb.0:
   1712 ; ALL-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
   1713 ; ALL-NEXT:    vpsrlw $15, %ymm0, %ymm0
   1714 ; ALL-NEXT:    retq
   1715   %mask = icmp eq <16 x i16> %x, %y
   1716   %1 = zext <16 x i1> %mask to <16 x i16>
   1717   ret <16 x i16> %1
   1718 }
   1719 
   1720 
   1721 define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
   1722 ; KNL-LABEL: zext_32xi1_to_32xi8:
   1723 ; KNL:       # %bb.0:
   1724 ; KNL-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
   1725 ; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
   1726 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
   1727 ; KNL-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
   1728 ; KNL-NEXT:    vpmovsxwd %ymm1, %zmm1
   1729 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
   1730 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1731 ; KNL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
   1732 ; KNL-NEXT:    retq
   1733 ;
   1734 ; SKX-LABEL: zext_32xi1_to_32xi8:
   1735 ; SKX:       # %bb.0:
   1736 ; SKX-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
   1737 ; SKX-NEXT:    vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
   1738 ; SKX-NEXT:    retq
   1739   %mask = icmp eq <32 x i16> %x, %y
   1740   %1 = zext <32 x i1> %mask to <32 x i8>
   1741   ret <32 x i8> %1
   1742 }
   1743 
   1744 define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
   1745 ; ALL-LABEL: zext_4xi1_to_4x32:
   1746 ; ALL:       # %bb.0:
   1747 ; ALL-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
   1748 ; ALL-NEXT:    vpand %xmm2, %xmm1, %xmm1
   1749 ; ALL-NEXT:    vpand %xmm2, %xmm0, %xmm0
   1750 ; ALL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
   1751 ; ALL-NEXT:    vpsrld $31, %xmm0, %xmm0
   1752 ; ALL-NEXT:    retq
   1753   %mask = icmp eq <4 x i8> %x, %y
   1754   %1 = zext <4 x i1> %mask to <4 x i32>
   1755   ret <4 x i32> %1
   1756 }
   1757 
   1758 define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
   1759 ; ALL-LABEL: zext_2xi1_to_2xi64:
   1760 ; ALL:       # %bb.0:
   1761 ; ALL-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
   1762 ; ALL-NEXT:    vpand %xmm2, %xmm1, %xmm1
   1763 ; ALL-NEXT:    vpand %xmm2, %xmm0, %xmm0
   1764 ; ALL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
   1765 ; ALL-NEXT:    vpsrlq $63, %xmm0, %xmm0
   1766 ; ALL-NEXT:    retq
   1767   %mask = icmp eq <2 x i8> %x, %y
   1768   %1 = zext <2 x i1> %mask to <2 x i64>
   1769   ret <2 x i64> %1
   1770 }
   1771