Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
      9 
     10 ;
     11 ; 128-bit vectors
     12 ;
     13 
     14 define <2 x i64> @ext_i2_2i64(i2 %a0) {
     15 ; SSE2-SSSE3-LABEL: ext_i2_2i64:
     16 ; SSE2-SSSE3:       # %bb.0:
     17 ; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
     18 ; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
     19 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
     20 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
     21 ; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
     22 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
     23 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
     24 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
     25 ; SSE2-SSSE3-NEXT:    psrlq $63, %xmm0
     26 ; SSE2-SSSE3-NEXT:    retq
     27 ;
     28 ; AVX1-LABEL: ext_i2_2i64:
     29 ; AVX1:       # %bb.0:
     30 ; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
     31 ; AVX1-NEXT:    vmovq %rdi, %xmm0
     32 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
     33 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2]
     34 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
     35 ; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
     36 ; AVX1-NEXT:    vpsrlq $63, %xmm0, %xmm0
     37 ; AVX1-NEXT:    retq
     38 ;
     39 ; AVX2-LABEL: ext_i2_2i64:
     40 ; AVX2:       # %bb.0:
     41 ; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
     42 ; AVX2-NEXT:    vmovq %rdi, %xmm0
     43 ; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
     44 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2]
     45 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
     46 ; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
     47 ; AVX2-NEXT:    vpsrlq $63, %xmm0, %xmm0
     48 ; AVX2-NEXT:    retq
     49 ;
     50 ; AVX512F-LABEL: ext_i2_2i64:
     51 ; AVX512F:       # %bb.0:
     52 ; AVX512F-NEXT:    kmovw %edi, %k1
     53 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
     54 ; AVX512F-NEXT:    vpsrlq $63, %xmm0, %xmm0
     55 ; AVX512F-NEXT:    vzeroupper
     56 ; AVX512F-NEXT:    retq
     57 ;
     58 ; AVX512VLBW-LABEL: ext_i2_2i64:
     59 ; AVX512VLBW:       # %bb.0:
     60 ; AVX512VLBW-NEXT:    kmovd %edi, %k1
     61 ; AVX512VLBW-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
     62 ; AVX512VLBW-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
     63 ; AVX512VLBW-NEXT:    vpsrlq $63, %xmm0, %xmm0
     64 ; AVX512VLBW-NEXT:    retq
     65   %1 = bitcast i2 %a0 to <2 x i1>
     66   %2 = zext <2 x i1> %1 to <2 x i64>
     67   ret <2 x i64> %2
     68 }
     69 
     70 define <4 x i32> @ext_i4_4i32(i4 %a0) {
     71 ; SSE2-SSSE3-LABEL: ext_i4_4i32:
     72 ; SSE2-SSSE3:       # %bb.0:
     73 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
     74 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
     75 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
     76 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
     77 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
     78 ; SSE2-SSSE3-NEXT:    psrld $31, %xmm0
     79 ; SSE2-SSSE3-NEXT:    retq
     80 ;
     81 ; AVX1-LABEL: ext_i4_4i32:
     82 ; AVX1:       # %bb.0:
     83 ; AVX1-NEXT:    vmovd %edi, %xmm0
     84 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
     85 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
     86 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
     87 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
     88 ; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
     89 ; AVX1-NEXT:    retq
     90 ;
     91 ; AVX2-LABEL: ext_i4_4i32:
     92 ; AVX2:       # %bb.0:
     93 ; AVX2-NEXT:    vmovd %edi, %xmm0
     94 ; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
     95 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
     96 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
     97 ; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
     98 ; AVX2-NEXT:    vpsrld $31, %xmm0, %xmm0
     99 ; AVX2-NEXT:    retq
    100 ;
    101 ; AVX512F-LABEL: ext_i4_4i32:
    102 ; AVX512F:       # %bb.0:
    103 ; AVX512F-NEXT:    kmovw %edi, %k1
    104 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    105 ; AVX512F-NEXT:    vpsrld $31, %xmm0, %xmm0
    106 ; AVX512F-NEXT:    vzeroupper
    107 ; AVX512F-NEXT:    retq
    108 ;
    109 ; AVX512VLBW-LABEL: ext_i4_4i32:
    110 ; AVX512VLBW:       # %bb.0:
    111 ; AVX512VLBW-NEXT:    kmovd %edi, %k1
    112 ; AVX512VLBW-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
    113 ; AVX512VLBW-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
    114 ; AVX512VLBW-NEXT:    vpsrld $31, %xmm0, %xmm0
    115 ; AVX512VLBW-NEXT:    retq
    116   %1 = bitcast i4 %a0 to <4 x i1>
    117   %2 = zext <4 x i1> %1 to <4 x i32>
    118   ret <4 x i32> %2
    119 }
    120 
    121 define <8 x i16> @ext_i8_8i16(i8 %a0) {
    122 ; SSE2-SSSE3-LABEL: ext_i8_8i16:
    123 ; SSE2-SSSE3:       # %bb.0:
    124 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
    125 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    126 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    127 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
    128 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
    129 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm1, %xmm0
    130 ; SSE2-SSSE3-NEXT:    psrlw $15, %xmm0
    131 ; SSE2-SSSE3-NEXT:    retq
    132 ;
    133 ; AVX1-LABEL: ext_i8_8i16:
    134 ; AVX1:       # %bb.0:
    135 ; AVX1-NEXT:    vmovd %edi, %xmm0
    136 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    137 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    138 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
    139 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
    140 ; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
    141 ; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
    142 ; AVX1-NEXT:    retq
    143 ;
    144 ; AVX2-LABEL: ext_i8_8i16:
    145 ; AVX2:       # %bb.0:
    146 ; AVX2-NEXT:    vmovd %edi, %xmm0
    147 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
    148 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
    149 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
    150 ; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
    151 ; AVX2-NEXT:    vpsrlw $15, %xmm0, %xmm0
    152 ; AVX2-NEXT:    retq
    153 ;
    154 ; AVX512F-LABEL: ext_i8_8i16:
    155 ; AVX512F:       # %bb.0:
    156 ; AVX512F-NEXT:    kmovw %edi, %k1
    157 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    158 ; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
    159 ; AVX512F-NEXT:    vpsrlw $15, %xmm0, %xmm0
    160 ; AVX512F-NEXT:    vzeroupper
    161 ; AVX512F-NEXT:    retq
    162 ;
    163 ; AVX512VLBW-LABEL: ext_i8_8i16:
    164 ; AVX512VLBW:       # %bb.0:
    165 ; AVX512VLBW-NEXT:    kmovd %edi, %k0
    166 ; AVX512VLBW-NEXT:    vpmovm2w %k0, %xmm0
    167 ; AVX512VLBW-NEXT:    vpsrlw $15, %xmm0, %xmm0
    168 ; AVX512VLBW-NEXT:    retq
    169   %1 = bitcast i8 %a0 to <8 x i1>
    170   %2 = zext <8 x i1> %1 to <8 x i16>
    171   ret <8 x i16> %2
    172 }
    173 
    174 define <16 x i8> @ext_i16_16i8(i16 %a0) {
    175 ; SSE2-LABEL: ext_i16_16i8:
    176 ; SSE2:       # %bb.0:
    177 ; SSE2-NEXT:    movd %edi, %xmm0
    178 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    179 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
    180 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    181 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
    182 ; SSE2-NEXT:    pand %xmm1, %xmm0
    183 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
    184 ; SSE2-NEXT:    psrlw $7, %xmm0
    185 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
    186 ; SSE2-NEXT:    retq
    187 ;
    188 ; SSSE3-LABEL: ext_i16_16i8:
    189 ; SSSE3:       # %bb.0:
    190 ; SSSE3-NEXT:    movd %edi, %xmm0
    191 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
    192 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
    193 ; SSSE3-NEXT:    pand %xmm1, %xmm0
    194 ; SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
    195 ; SSSE3-NEXT:    psrlw $7, %xmm0
    196 ; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm0
    197 ; SSSE3-NEXT:    retq
    198 ;
    199 ; AVX1-LABEL: ext_i16_16i8:
    200 ; AVX1:       # %bb.0:
    201 ; AVX1-NEXT:    vmovd %edi, %xmm0
    202 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
    203 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
    204 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
    205 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
    206 ; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
    207 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    208 ; AVX1-NEXT:    retq
    209 ;
    210 ; AVX2-LABEL: ext_i16_16i8:
    211 ; AVX2:       # %bb.0:
    212 ; AVX2-NEXT:    vmovd %edi, %xmm0
    213 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
    214 ; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
    215 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
    216 ; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
    217 ; AVX2-NEXT:    vpsrlw $7, %xmm0, %xmm0
    218 ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    219 ; AVX2-NEXT:    retq
    220 ;
    221 ; AVX512F-LABEL: ext_i16_16i8:
    222 ; AVX512F:       # %bb.0:
    223 ; AVX512F-NEXT:    kmovw %edi, %k1
    224 ; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
    225 ; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
    226 ; AVX512F-NEXT:    vzeroupper
    227 ; AVX512F-NEXT:    retq
    228 ;
    229 ; AVX512VLBW-LABEL: ext_i16_16i8:
    230 ; AVX512VLBW:       # %bb.0:
    231 ; AVX512VLBW-NEXT:    kmovd %edi, %k1
    232 ; AVX512VLBW-NEXT:    vmovdqu8 {{.*}}(%rip), %xmm0 {%k1} {z}
    233 ; AVX512VLBW-NEXT:    retq
    234   %1 = bitcast i16 %a0 to <16 x i1>
    235   %2 = zext <16 x i1> %1 to <16 x i8>
    236   ret <16 x i8> %2
    237 }
    238 
    239 ;
    240 ; 256-bit vectors
    241 ;
    242 
    243 define <4 x i64> @ext_i4_4i64(i4 %a0) {
    244 ; SSE2-SSSE3-LABEL: ext_i4_4i64:
    245 ; SSE2-SSSE3:       # %bb.0:
    246 ; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
    247 ; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
    248 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
    249 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
    250 ; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm1
    251 ; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
    252 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
    253 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
    254 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
    255 ; SSE2-SSSE3-NEXT:    psrlq $63, %xmm0
    256 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4,8]
    257 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
    258 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
    259 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
    260 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
    261 ; SSE2-SSSE3-NEXT:    psrlq $63, %xmm1
    262 ; SSE2-SSSE3-NEXT:    retq
    263 ;
    264 ; AVX1-LABEL: ext_i4_4i64:
    265 ; AVX1:       # %bb.0:
    266 ; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
    267 ; AVX1-NEXT:    vmovq %rdi, %xmm0
    268 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
    269 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    270 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    271 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    272 ; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm2
    273 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
    274 ; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
    275 ; AVX1-NEXT:    vpsrlq $63, %xmm2, %xmm2
    276 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    277 ; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
    278 ; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
    279 ; AVX1-NEXT:    vpsrlq $63, %xmm0, %xmm0
    280 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    281 ; AVX1-NEXT:    retq
    282 ;
    283 ; AVX2-LABEL: ext_i4_4i64:
    284 ; AVX2:       # %bb.0:
    285 ; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
    286 ; AVX2-NEXT:    vmovq %rdi, %xmm0
    287 ; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
    288 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
    289 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    290 ; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
    291 ; AVX2-NEXT:    vpsrlq $63, %ymm0, %ymm0
    292 ; AVX2-NEXT:    retq
    293 ;
    294 ; AVX512F-LABEL: ext_i4_4i64:
    295 ; AVX512F:       # %bb.0:
    296 ; AVX512F-NEXT:    kmovw %edi, %k1
    297 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    298 ; AVX512F-NEXT:    vpsrlq $63, %ymm0, %ymm0
    299 ; AVX512F-NEXT:    retq
    300 ;
    301 ; AVX512VLBW-LABEL: ext_i4_4i64:
    302 ; AVX512VLBW:       # %bb.0:
    303 ; AVX512VLBW-NEXT:    kmovd %edi, %k1
    304 ; AVX512VLBW-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
    305 ; AVX512VLBW-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
    306 ; AVX512VLBW-NEXT:    vpsrlq $63, %ymm0, %ymm0
    307 ; AVX512VLBW-NEXT:    retq
    308   %1 = bitcast i4 %a0 to <4 x i1>
    309   %2 = zext <4 x i1> %1 to <4 x i64>
    310   ret <4 x i64> %2
    311 }
    312 
    313 define <8 x i32> @ext_i8_8i32(i8 %a0) {
    314 ; SSE2-SSSE3-LABEL: ext_i8_8i32:
    315 ; SSE2-SSSE3:       # %bb.0:
    316 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
    317 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
    318 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8]
    319 ; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
    320 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
    321 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm0
    322 ; SSE2-SSSE3-NEXT:    psrld $31, %xmm0
    323 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
    324 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
    325 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm1
    326 ; SSE2-SSSE3-NEXT:    psrld $31, %xmm1
    327 ; SSE2-SSSE3-NEXT:    retq
    328 ;
    329 ; AVX1-LABEL: ext_i8_8i32:
    330 ; AVX1:       # %bb.0:
    331 ; AVX1-NEXT:    vmovd %edi, %xmm0
    332 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    333 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    334 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    335 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    336 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm2
    337 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
    338 ; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
    339 ; AVX1-NEXT:    vpsrld $31, %xmm2, %xmm2
    340 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    341 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
    342 ; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
    343 ; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
    344 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    345 ; AVX1-NEXT:    retq
    346 ;
    347 ; AVX2-LABEL: ext_i8_8i32:
    348 ; AVX2:       # %bb.0:
    349 ; AVX2-NEXT:    vmovd %edi, %xmm0
    350 ; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
    351 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
    352 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    353 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
    354 ; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
    355 ; AVX2-NEXT:    retq
    356 ;
    357 ; AVX512F-LABEL: ext_i8_8i32:
    358 ; AVX512F:       # %bb.0:
    359 ; AVX512F-NEXT:    kmovw %edi, %k1
    360 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    361 ; AVX512F-NEXT:    vpsrld $31, %ymm0, %ymm0
    362 ; AVX512F-NEXT:    retq
    363 ;
    364 ; AVX512VLBW-LABEL: ext_i8_8i32:
    365 ; AVX512VLBW:       # %bb.0:
    366 ; AVX512VLBW-NEXT:    kmovd %edi, %k1
    367 ; AVX512VLBW-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
    368 ; AVX512VLBW-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
    369 ; AVX512VLBW-NEXT:    vpsrld $31, %ymm0, %ymm0
    370 ; AVX512VLBW-NEXT:    retq
    371   %1 = bitcast i8 %a0 to <8 x i1>
    372   %2 = zext <8 x i1> %1 to <8 x i32>
    373   ret <8 x i32> %2
    374 }
    375 
    376 define <16 x i16> @ext_i16_16i16(i16 %a0) {
    377 ; SSE2-SSSE3-LABEL: ext_i16_16i16:
    378 ; SSE2-SSSE3:       # %bb.0:
    379 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
    380 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    381 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
    382 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
    383 ; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
    384 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
    385 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm2, %xmm0
    386 ; SSE2-SSSE3-NEXT:    psrlw $15, %xmm0
    387 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
    388 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
    389 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm2, %xmm1
    390 ; SSE2-SSSE3-NEXT:    psrlw $15, %xmm1
    391 ; SSE2-SSSE3-NEXT:    retq
    392 ;
    393 ; AVX1-LABEL: ext_i16_16i16:
    394 ; AVX1:       # %bb.0:
    395 ; AVX1-NEXT:    vmovd %edi, %xmm0
    396 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    397 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    398 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    399 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    400 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    401 ; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm2
    402 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
    403 ; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
    404 ; AVX1-NEXT:    vpsrlw $15, %xmm2, %xmm2
    405 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    406 ; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
    407 ; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
    408 ; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
    409 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    410 ; AVX1-NEXT:    retq
    411 ;
    412 ; AVX2-LABEL: ext_i16_16i16:
    413 ; AVX2:       # %bb.0:
    414 ; AVX2-NEXT:    vmovd %edi, %xmm0
    415 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
    416 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
    417 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    418 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
    419 ; AVX2-NEXT:    vpsrlw $15, %ymm0, %ymm0
    420 ; AVX2-NEXT:    retq
    421 ;
    422 ; AVX512F-LABEL: ext_i16_16i16:
    423 ; AVX512F:       # %bb.0:
    424 ; AVX512F-NEXT:    kmovw %edi, %k1
    425 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    426 ; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
    427 ; AVX512F-NEXT:    vpsrlw $15, %ymm0, %ymm0
    428 ; AVX512F-NEXT:    retq
    429 ;
    430 ; AVX512VLBW-LABEL: ext_i16_16i16:
    431 ; AVX512VLBW:       # %bb.0:
    432 ; AVX512VLBW-NEXT:    kmovd %edi, %k0
    433 ; AVX512VLBW-NEXT:    vpmovm2w %k0, %ymm0
    434 ; AVX512VLBW-NEXT:    vpsrlw $15, %ymm0, %ymm0
    435 ; AVX512VLBW-NEXT:    retq
    436   %1 = bitcast i16 %a0 to <16 x i1>
    437   %2 = zext <16 x i1> %1 to <16 x i16>
    438   ret <16 x i16> %2
    439 }
    440 
    441 define <32 x i8> @ext_i32_32i8(i32 %a0) {
    442 ; SSE2-SSSE3-LABEL: ext_i32_32i8:
    443 ; SSE2-SSSE3:       # %bb.0:
    444 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm1
    445 ; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    446 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
    447 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    448 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
    449 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
    450 ; SSE2-SSSE3-NEXT:    pcmpeqb %xmm2, %xmm0
    451 ; SSE2-SSSE3-NEXT:    psrlw $7, %xmm0
    452 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    453 ; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
    454 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
    455 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    456 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
    457 ; SSE2-SSSE3-NEXT:    pcmpeqb %xmm2, %xmm1
    458 ; SSE2-SSSE3-NEXT:    psrlw $7, %xmm1
    459 ; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm1
    460 ; SSE2-SSSE3-NEXT:    retq
    461 ;
    462 ; AVX1-LABEL: ext_i32_32i8:
    463 ; AVX1:       # %bb.0:
    464 ; AVX1-NEXT:    vmovd %edi, %xmm0
    465 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    466 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
    467 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    468 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
    469 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    470 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    471 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    472 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    473 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    474 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
    475 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
    476 ; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
    477 ; AVX1-NEXT:    vpsrlw $7, %xmm1, %xmm1
    478 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    479 ; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
    480 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
    481 ; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
    482 ; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
    483 ; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
    484 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    485 ; AVX1-NEXT:    retq
    486 ;
    487 ; AVX2-SLOW-LABEL: ext_i32_32i8:
    488 ; AVX2-SLOW:       # %bb.0:
    489 ; AVX2-SLOW-NEXT:    vmovd %edi, %xmm0
    490 ; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    491 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
    492 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    493 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
    494 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    495 ; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    496 ; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
    497 ; AVX2-SLOW-NEXT:    vpand %ymm1, %ymm0, %ymm0
    498 ; AVX2-SLOW-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
    499 ; AVX2-SLOW-NEXT:    vpsrlw $7, %ymm0, %ymm0
    500 ; AVX2-SLOW-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
    501 ; AVX2-SLOW-NEXT:    retq
    502 ;
    503 ; AVX2-FAST-LABEL: ext_i32_32i8:
    504 ; AVX2-FAST:       # %bb.0:
    505 ; AVX2-FAST-NEXT:    vmovd %edi, %xmm0
    506 ; AVX2-FAST-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    507 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
    508 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
    509 ; AVX2-FAST-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    510 ; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
    511 ; AVX2-FAST-NEXT:    vpand %ymm1, %ymm0, %ymm0
    512 ; AVX2-FAST-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
    513 ; AVX2-FAST-NEXT:    vpsrlw $7, %ymm0, %ymm0
    514 ; AVX2-FAST-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
    515 ; AVX2-FAST-NEXT:    retq
    516 ;
    517 ; AVX512F-LABEL: ext_i32_32i8:
    518 ; AVX512F:       # %bb.0:
    519 ; AVX512F-NEXT:    kmovw %edi, %k1
    520 ; AVX512F-NEXT:    shrl $16, %edi
    521 ; AVX512F-NEXT:    kmovw %edi, %k2
    522 ; AVX512F-NEXT:    movl {{.*}}(%rip), %eax
    523 ; AVX512F-NEXT:    vpbroadcastd %eax, %zmm0 {%k1} {z}
    524 ; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
    525 ; AVX512F-NEXT:    vpbroadcastd %eax, %zmm1 {%k2} {z}
    526 ; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
    527 ; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    528 ; AVX512F-NEXT:    retq
    529 ;
    530 ; AVX512VLBW-LABEL: ext_i32_32i8:
    531 ; AVX512VLBW:       # %bb.0:
    532 ; AVX512VLBW-NEXT:    kmovd %edi, %k1
    533 ; AVX512VLBW-NEXT:    vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
    534 ; AVX512VLBW-NEXT:    retq
    535   %1 = bitcast i32 %a0 to <32 x i1>
    536   %2 = zext <32 x i1> %1 to <32 x i8>
    537   ret <32 x i8> %2
    538 }
    539 
    540 ;
    541 ; 512-bit vectors
    542 ;
    543 
    544 define <8 x i64> @ext_i8_8i64(i8 %a0) {
    545 ; SSE2-SSSE3-LABEL: ext_i8_8i64:
    546 ; SSE2-SSSE3:       # %bb.0:
    547 ; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
    548 ; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
    549 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
    550 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
    551 ; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm1
    552 ; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
    553 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
    554 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
    555 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
    556 ; SSE2-SSSE3-NEXT:    psrlq $63, %xmm0
    557 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4,8]
    558 ; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm2
    559 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
    560 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
    561 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
    562 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
    563 ; SSE2-SSSE3-NEXT:    psrlq $63, %xmm1
    564 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32]
    565 ; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm3
    566 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
    567 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm3
    568 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
    569 ; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm2
    570 ; SSE2-SSSE3-NEXT:    psrlq $63, %xmm2
    571 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [64,128]
    572 ; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm4
    573 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm4
    574 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
    575 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
    576 ; SSE2-SSSE3-NEXT:    psrlq $63, %xmm3
    577 ; SSE2-SSSE3-NEXT:    retq
    578 ;
    579 ; AVX1-LABEL: ext_i8_8i64:
    580 ; AVX1:       # %bb.0:
    581 ; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
    582 ; AVX1-NEXT:    vmovq %rdi, %xmm0
    583 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
    584 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
    585 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm0
    586 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    587 ; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm3
    588 ; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
    589 ; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
    590 ; AVX1-NEXT:    vpsrlq $63, %xmm3, %xmm3
    591 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    592 ; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
    593 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
    594 ; AVX1-NEXT:    vpsrlq $63, %xmm0, %xmm0
    595 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
    596 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
    597 ; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm3
    598 ; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
    599 ; AVX1-NEXT:    vpsrlq $63, %xmm3, %xmm3
    600 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    601 ; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
    602 ; AVX1-NEXT:    vpxor %xmm4, %xmm1, %xmm1
    603 ; AVX1-NEXT:    vpsrlq $63, %xmm1, %xmm1
    604 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
    605 ; AVX1-NEXT:    retq
    606 ;
    607 ; AVX2-LABEL: ext_i8_8i64:
    608 ; AVX2:       # %bb.0:
    609 ; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
    610 ; AVX2-NEXT:    vmovq %rdi, %xmm0
    611 ; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm1
    612 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
    613 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm2
    614 ; AVX2-NEXT:    vpcmpeqq %ymm0, %ymm2, %ymm0
    615 ; AVX2-NEXT:    vpsrlq $63, %ymm0, %ymm0
    616 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
    617 ; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
    618 ; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
    619 ; AVX2-NEXT:    vpsrlq $63, %ymm1, %ymm1
    620 ; AVX2-NEXT:    retq
    621 ;
    622 ; AVX512F-LABEL: ext_i8_8i64:
    623 ; AVX512F:       # %bb.0:
    624 ; AVX512F-NEXT:    kmovw %edi, %k1
    625 ; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    626 ; AVX512F-NEXT:    vpsrlq $63, %zmm0, %zmm0
    627 ; AVX512F-NEXT:    retq
    628 ;
    629 ; AVX512VLBW-LABEL: ext_i8_8i64:
    630 ; AVX512VLBW:       # %bb.0:
    631 ; AVX512VLBW-NEXT:    kmovd %edi, %k1
    632 ; AVX512VLBW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    633 ; AVX512VLBW-NEXT:    vpsrlq $63, %zmm0, %zmm0
    634 ; AVX512VLBW-NEXT:    retq
    635   %1 = bitcast i8 %a0 to <8 x i1>
    636   %2 = zext <8 x i1> %1 to <8 x i64>
    637   ret <8 x i64> %2
    638 }
    639 
    640 define <16 x i32> @ext_i16_16i32(i16 %a0) {
    641 ; SSE2-SSSE3-LABEL: ext_i16_16i32:
    642 ; SSE2-SSSE3:       # %bb.0:
    643 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
    644 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
    645 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
    646 ; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm0
    647 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
    648 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
    649 ; SSE2-SSSE3-NEXT:    psrld $31, %xmm0
    650 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
    651 ; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm1
    652 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
    653 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm1
    654 ; SSE2-SSSE3-NEXT:    psrld $31, %xmm1
    655 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
    656 ; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
    657 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
    658 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm2
    659 ; SSE2-SSSE3-NEXT:    psrld $31, %xmm2
    660 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
    661 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
    662 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm3
    663 ; SSE2-SSSE3-NEXT:    psrld $31, %xmm3
    664 ; SSE2-SSSE3-NEXT:    retq
    665 ;
    666 ; AVX1-LABEL: ext_i16_16i32:
    667 ; AVX1:       # %bb.0:
    668 ; AVX1-NEXT:    vmovd %edi, %xmm0
    669 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    670 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
    671 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm0
    672 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    673 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm3
    674 ; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
    675 ; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
    676 ; AVX1-NEXT:    vpsrld $31, %xmm3, %xmm3
    677 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    678 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
    679 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
    680 ; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
    681 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
    682 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
    683 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm3
    684 ; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
    685 ; AVX1-NEXT:    vpsrld $31, %xmm3, %xmm3
    686 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    687 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
    688 ; AVX1-NEXT:    vpxor %xmm4, %xmm1, %xmm1
    689 ; AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
    690 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
    691 ; AVX1-NEXT:    retq
    692 ;
    693 ; AVX2-LABEL: ext_i16_16i32:
    694 ; AVX2:       # %bb.0:
    695 ; AVX2-NEXT:    vmovd %edi, %xmm0
    696 ; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm1
    697 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
    698 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm2
    699 ; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm2, %ymm0
    700 ; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
    701 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
    702 ; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
    703 ; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
    704 ; AVX2-NEXT:    vpsrld $31, %ymm1, %ymm1
    705 ; AVX2-NEXT:    retq
    706 ;
    707 ; AVX512F-LABEL: ext_i16_16i32:
    708 ; AVX512F:       # %bb.0:
    709 ; AVX512F-NEXT:    kmovw %edi, %k1
    710 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    711 ; AVX512F-NEXT:    vpsrld $31, %zmm0, %zmm0
    712 ; AVX512F-NEXT:    retq
    713 ;
    714 ; AVX512VLBW-LABEL: ext_i16_16i32:
    715 ; AVX512VLBW:       # %bb.0:
    716 ; AVX512VLBW-NEXT:    kmovd %edi, %k1
    717 ; AVX512VLBW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    718 ; AVX512VLBW-NEXT:    vpsrld $31, %zmm0, %zmm0
    719 ; AVX512VLBW-NEXT:    retq
    720   %1 = bitcast i16 %a0 to <16 x i1>
    721   %2 = zext <16 x i1> %1 to <16 x i32>
    722   ret <16 x i32> %2
    723 }
    724 
    725 define <32 x i16> @ext_i32_32i16(i32 %a0) {
    726 ; SSE2-SSSE3-LABEL: ext_i32_32i16:
    727 ; SSE2-SSSE3:       # %bb.0:
    728 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm2
    729 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
    730 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
    731 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
    732 ; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
    733 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
    734 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm4, %xmm0
    735 ; SSE2-SSSE3-NEXT:    psrlw $15, %xmm0
    736 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
    737 ; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
    738 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm5, %xmm1
    739 ; SSE2-SSSE3-NEXT:    psrlw $15, %xmm1
    740 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
    741 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
    742 ; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
    743 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
    744 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm4, %xmm2
    745 ; SSE2-SSSE3-NEXT:    psrlw $15, %xmm2
    746 ; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm3
    747 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm5, %xmm3
    748 ; SSE2-SSSE3-NEXT:    psrlw $15, %xmm3
    749 ; SSE2-SSSE3-NEXT:    retq
    750 ;
    751 ; AVX1-LABEL: ext_i32_32i16:
    752 ; AVX1:       # %bb.0:
    753 ; AVX1-NEXT:    vmovd %edi, %xmm1
    754 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
    755 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    756 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    757 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
    758 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    759 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    760 ; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm4
    761 ; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
    762 ; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
    763 ; AVX1-NEXT:    vpsrlw $15, %xmm4, %xmm4
    764 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    765 ; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
    766 ; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
    767 ; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
    768 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
    769 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
    770 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
    771 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
    772 ; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
    773 ; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm2
    774 ; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
    775 ; AVX1-NEXT:    vpsrlw $15, %xmm2, %xmm2
    776 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    777 ; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
    778 ; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
    779 ; AVX1-NEXT:    vpsrlw $15, %xmm1, %xmm1
    780 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
    781 ; AVX1-NEXT:    retq
    782 ;
    783 ; AVX2-LABEL: ext_i32_32i16:
    784 ; AVX2:       # %bb.0:
    785 ; AVX2-NEXT:    vmovd %edi, %xmm0
    786 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
    787 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
    788 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    789 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
    790 ; AVX2-NEXT:    vpsrlw $15, %ymm0, %ymm0
    791 ; AVX2-NEXT:    shrl $16, %edi
    792 ; AVX2-NEXT:    vmovd %edi, %xmm2
    793 ; AVX2-NEXT:    vpbroadcastw %xmm2, %ymm2
    794 ; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm2
    795 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm2, %ymm1
    796 ; AVX2-NEXT:    vpsrlw $15, %ymm1, %ymm1
    797 ; AVX2-NEXT:    retq
    798 ;
    799 ; AVX512F-LABEL: ext_i32_32i16:
    800 ; AVX512F:       # %bb.0:
    801 ; AVX512F-NEXT:    kmovw %edi, %k1
    802 ; AVX512F-NEXT:    shrl $16, %edi
    803 ; AVX512F-NEXT:    kmovw %edi, %k2
    804 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    805 ; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
    806 ; AVX512F-NEXT:    vpsrlw $15, %ymm0, %ymm0
    807 ; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
    808 ; AVX512F-NEXT:    vpmovdw %zmm1, %ymm1
    809 ; AVX512F-NEXT:    vpsrlw $15, %ymm1, %ymm1
    810 ; AVX512F-NEXT:    retq
    811 ;
    812 ; AVX512VLBW-LABEL: ext_i32_32i16:
    813 ; AVX512VLBW:       # %bb.0:
    814 ; AVX512VLBW-NEXT:    kmovd %edi, %k0
    815 ; AVX512VLBW-NEXT:    vpmovm2w %k0, %zmm0
    816 ; AVX512VLBW-NEXT:    vpsrlw $15, %zmm0, %zmm0
    817 ; AVX512VLBW-NEXT:    retq
    818   %1 = bitcast i32 %a0 to <32 x i1>
    819   %2 = zext <32 x i1> %1 to <32 x i16>
    820   ret <32 x i16> %2
    821 }
    822 
    823 define <64 x i8> @ext_i64_64i8(i64 %a0) {
    824 ; SSE2-SSSE3-LABEL: ext_i64_64i8:
    825 ; SSE2-SSSE3:       # %bb.0:
    826 ; SSE2-SSSE3-NEXT:    movq %rdi, %xmm3
    827 ; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    828 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
    829 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    830 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
    831 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
    832 ; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm0
    833 ; SSE2-SSSE3-NEXT:    psrlw $7, %xmm0
    834 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    835 ; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm0
    836 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
    837 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    838 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm1
    839 ; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm1
    840 ; SSE2-SSSE3-NEXT:    psrlw $7, %xmm1
    841 ; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
    842 ; SSE2-SSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
    843 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
    844 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
    845 ; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm2
    846 ; SSE2-SSSE3-NEXT:    psrlw $7, %xmm2
    847 ; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm2
    848 ; SSE2-SSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
    849 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
    850 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
    851 ; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm3
    852 ; SSE2-SSSE3-NEXT:    psrlw $7, %xmm3
    853 ; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm3
    854 ; SSE2-SSSE3-NEXT:    retq
    855 ;
    856 ; AVX1-LABEL: ext_i64_64i8:
    857 ; AVX1:       # %bb.0:
    858 ; AVX1-NEXT:    vmovq %rdi, %xmm0
    859 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    860 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
    861 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    862 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
    863 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
    864 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    865 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
    866 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    867 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    868 ; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    869 ; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm3, %xmm3
    870 ; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
    871 ; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
    872 ; AVX1-NEXT:    vpsrlw $7, %xmm3, %xmm3
    873 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    874 ; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
    875 ; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm0, %xmm0
    876 ; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
    877 ; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
    878 ; AVX1-NEXT:    vpand %xmm6, %xmm0, %xmm0
    879 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
    880 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
    881 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
    882 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
    883 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
    884 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
    885 ; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
    886 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    887 ; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm2, %xmm2
    888 ; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
    889 ; AVX1-NEXT:    vpsrlw $7, %xmm2, %xmm2
    890 ; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
    891 ; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm1, %xmm1
    892 ; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
    893 ; AVX1-NEXT:    vpsrlw $7, %xmm1, %xmm1
    894 ; AVX1-NEXT:    vpand %xmm6, %xmm1, %xmm1
    895 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
    896 ; AVX1-NEXT:    retq
    897 ;
    898 ; AVX2-SLOW-LABEL: ext_i64_64i8:
    899 ; AVX2-SLOW:       # %bb.0:
    900 ; AVX2-SLOW-NEXT:    vmovq %rdi, %xmm0
    901 ; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    902 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
    903 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    904 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
    905 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
    906 ; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    907 ; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
    908 ; AVX2-SLOW-NEXT:    vpand %ymm2, %ymm0, %ymm0
    909 ; AVX2-SLOW-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
    910 ; AVX2-SLOW-NEXT:    vpsrlw $7, %ymm0, %ymm0
    911 ; AVX2-SLOW-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    912 ; AVX2-SLOW-NEXT:    vpand %ymm3, %ymm0, %ymm0
    913 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5]
    914 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
    915 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
    916 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
    917 ; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm1, %ymm4, %ymm1
    918 ; AVX2-SLOW-NEXT:    vpand %ymm2, %ymm1, %ymm1
    919 ; AVX2-SLOW-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
    920 ; AVX2-SLOW-NEXT:    vpsrlw $7, %ymm1, %ymm1
    921 ; AVX2-SLOW-NEXT:    vpand %ymm3, %ymm1, %ymm1
    922 ; AVX2-SLOW-NEXT:    retq
    923 ;
    924 ; AVX2-FAST-LABEL: ext_i64_64i8:
    925 ; AVX2-FAST:       # %bb.0:
    926 ; AVX2-FAST-NEXT:    vmovq %rdi, %xmm0
    927 ; AVX2-FAST-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    928 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm1[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
    929 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm2 = xmm1[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
    930 ; AVX2-FAST-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    931 ; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
    932 ; AVX2-FAST-NEXT:    vpand %ymm2, %ymm0, %ymm0
    933 ; AVX2-FAST-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
    934 ; AVX2-FAST-NEXT:    vpsrlw $7, %ymm0, %ymm0
    935 ; AVX2-FAST-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    936 ; AVX2-FAST-NEXT:    vpand %ymm3, %ymm0, %ymm0
    937 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm4 = xmm1[8,9,8,9,8,9,8,9,10,11,10,11,10,11,10,11]
    938 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,12,13,12,13,14,15,14,15,14,15,14,15]
    939 ; AVX2-FAST-NEXT:    vinserti128 $1, %xmm1, %ymm4, %ymm1
    940 ; AVX2-FAST-NEXT:    vpand %ymm2, %ymm1, %ymm1
    941 ; AVX2-FAST-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
    942 ; AVX2-FAST-NEXT:    vpsrlw $7, %ymm1, %ymm1
    943 ; AVX2-FAST-NEXT:    vpand %ymm3, %ymm1, %ymm1
    944 ; AVX2-FAST-NEXT:    retq
    945 ;
    946 ; AVX512F-LABEL: ext_i64_64i8:
    947 ; AVX512F:       # %bb.0:
    948 ; AVX512F-NEXT:    movq %rdi, %rax
    949 ; AVX512F-NEXT:    movq %rdi, %rcx
    950 ; AVX512F-NEXT:    kmovw %edi, %k1
    951 ; AVX512F-NEXT:    movl %edi, %edx
    952 ; AVX512F-NEXT:    shrl $16, %edx
    953 ; AVX512F-NEXT:    shrq $32, %rax
    954 ; AVX512F-NEXT:    shrq $48, %rcx
    955 ; AVX512F-NEXT:    kmovw %ecx, %k2
    956 ; AVX512F-NEXT:    kmovw %eax, %k3
    957 ; AVX512F-NEXT:    kmovw %edx, %k4
    958 ; AVX512F-NEXT:    movl {{.*}}(%rip), %eax
    959 ; AVX512F-NEXT:    vpbroadcastd %eax, %zmm0 {%k1} {z}
    960 ; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
    961 ; AVX512F-NEXT:    vpbroadcastd %eax, %zmm1 {%k4} {z}
    962 ; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
    963 ; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    964 ; AVX512F-NEXT:    vpbroadcastd %eax, %zmm1 {%k3} {z}
    965 ; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
    966 ; AVX512F-NEXT:    vpbroadcastd %eax, %zmm2 {%k2} {z}
    967 ; AVX512F-NEXT:    vpmovdb %zmm2, %xmm2
    968 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    969 ; AVX512F-NEXT:    retq
    970 ;
    971 ; AVX512VLBW-LABEL: ext_i64_64i8:
    972 ; AVX512VLBW:       # %bb.0:
    973 ; AVX512VLBW-NEXT:    kmovq %rdi, %k1
    974 ; AVX512VLBW-NEXT:    vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
    975 ; AVX512VLBW-NEXT:    retq
    976   %1 = bitcast i64 %a0 to <64 x i1>
    977   %2 = zext <64 x i1> %1 to <64 x i8>
    978   ret <64 x i8> %2
    979 }
    980