Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX512
      8 
      9 ;
     10 ; 128-bit vectors
     11 ;
     12 
     13 define <2 x i64> @ext_i2_2i64(i2 %a0) {
     14 ; SSE2-SSSE3-LABEL: ext_i2_2i64:
     15 ; SSE2-SSSE3:       # %bb.0:
     16 ; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
     17 ; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
     18 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
     19 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
     20 ; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
     21 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
     22 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
     23 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
     24 ; SSE2-SSSE3-NEXT:    retq
     25 ;
     26 ; AVX1-LABEL: ext_i2_2i64:
     27 ; AVX1:       # %bb.0:
     28 ; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
     29 ; AVX1-NEXT:    vmovq %rdi, %xmm0
     30 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
     31 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2]
     32 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
     33 ; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
     34 ; AVX1-NEXT:    retq
     35 ;
     36 ; AVX2-LABEL: ext_i2_2i64:
     37 ; AVX2:       # %bb.0:
     38 ; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
     39 ; AVX2-NEXT:    vmovq %rdi, %xmm0
     40 ; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
     41 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2]
     42 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
     43 ; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
     44 ; AVX2-NEXT:    retq
     45 ;
     46 ; AVX512-LABEL: ext_i2_2i64:
     47 ; AVX512:       # %bb.0:
     48 ; AVX512-NEXT:    kmovd %edi, %k1
     49 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
     50 ; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
     51 ; AVX512-NEXT:    retq
     52   %1 = bitcast i2 %a0 to <2 x i1>
     53   %2 = sext <2 x i1> %1 to <2 x i64>
     54   ret <2 x i64> %2
     55 }
     56 
     57 define <4 x i32> @ext_i4_4i32(i4 %a0) {
     58 ; SSE2-SSSE3-LABEL: ext_i4_4i32:
     59 ; SSE2-SSSE3:       # %bb.0:
     60 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
     61 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
     62 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
     63 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
     64 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
     65 ; SSE2-SSSE3-NEXT:    retq
     66 ;
     67 ; AVX1-LABEL: ext_i4_4i32:
     68 ; AVX1:       # %bb.0:
     69 ; AVX1-NEXT:    vmovd %edi, %xmm0
     70 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
     71 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
     72 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
     73 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
     74 ; AVX1-NEXT:    retq
     75 ;
     76 ; AVX2-LABEL: ext_i4_4i32:
     77 ; AVX2:       # %bb.0:
     78 ; AVX2-NEXT:    vmovd %edi, %xmm0
     79 ; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
     80 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
     81 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
     82 ; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
     83 ; AVX2-NEXT:    retq
     84 ;
     85 ; AVX512-LABEL: ext_i4_4i32:
     86 ; AVX512:       # %bb.0:
     87 ; AVX512-NEXT:    kmovd %edi, %k1
     88 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
     89 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
     90 ; AVX512-NEXT:    retq
     91   %1 = bitcast i4 %a0 to <4 x i1>
     92   %2 = sext <4 x i1> %1 to <4 x i32>
     93   ret <4 x i32> %2
     94 }
     95 
     96 define <8 x i16> @ext_i8_8i16(i8 %a0) {
     97 ; SSE2-SSSE3-LABEL: ext_i8_8i16:
     98 ; SSE2-SSSE3:       # %bb.0:
     99 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
    100 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    101 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    102 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
    103 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
    104 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm1, %xmm0
    105 ; SSE2-SSSE3-NEXT:    retq
    106 ;
    107 ; AVX1-LABEL: ext_i8_8i16:
    108 ; AVX1:       # %bb.0:
    109 ; AVX1-NEXT:    vmovd %edi, %xmm0
    110 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    111 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    112 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
    113 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
    114 ; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
    115 ; AVX1-NEXT:    retq
    116 ;
    117 ; AVX2-LABEL: ext_i8_8i16:
    118 ; AVX2:       # %bb.0:
    119 ; AVX2-NEXT:    vmovd %edi, %xmm0
    120 ; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
    121 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
    122 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
    123 ; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
    124 ; AVX2-NEXT:    retq
    125 ;
    126 ; AVX512-LABEL: ext_i8_8i16:
    127 ; AVX512:       # %bb.0:
    128 ; AVX512-NEXT:    kmovd %edi, %k0
    129 ; AVX512-NEXT:    vpmovm2w %k0, %xmm0
    130 ; AVX512-NEXT:    retq
    131   %1 = bitcast i8 %a0 to <8 x i1>
    132   %2 = sext <8 x i1> %1 to <8 x i16>
    133   ret <8 x i16> %2
    134 }
    135 
    136 define <16 x i8> @ext_i16_16i8(i16 %a0) {
    137 ; SSE2-LABEL: ext_i16_16i8:
    138 ; SSE2:       # %bb.0:
    139 ; SSE2-NEXT:    movd %edi, %xmm0
    140 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    141 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
    142 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    143 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
    144 ; SSE2-NEXT:    pand %xmm1, %xmm0
    145 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
    146 ; SSE2-NEXT:    retq
    147 ;
    148 ; SSSE3-LABEL: ext_i16_16i8:
    149 ; SSSE3:       # %bb.0:
    150 ; SSSE3-NEXT:    movd %edi, %xmm0
    151 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
    152 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
    153 ; SSSE3-NEXT:    pand %xmm1, %xmm0
    154 ; SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
    155 ; SSSE3-NEXT:    retq
    156 ;
    157 ; AVX1-LABEL: ext_i16_16i8:
    158 ; AVX1:       # %bb.0:
    159 ; AVX1-NEXT:    vmovd %edi, %xmm0
    160 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
    161 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
    162 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
    163 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
    164 ; AVX1-NEXT:    retq
    165 ;
    166 ; AVX2-LABEL: ext_i16_16i8:
    167 ; AVX2:       # %bb.0:
    168 ; AVX2-NEXT:    vmovd %edi, %xmm0
    169 ; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
    170 ; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
    171 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
    172 ; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
    173 ; AVX2-NEXT:    retq
    174 ;
    175 ; AVX512-LABEL: ext_i16_16i8:
    176 ; AVX512:       # %bb.0:
    177 ; AVX512-NEXT:    kmovd %edi, %k0
    178 ; AVX512-NEXT:    vpmovm2b %k0, %xmm0
    179 ; AVX512-NEXT:    retq
    180   %1 = bitcast i16 %a0 to <16 x i1>
    181   %2 = sext <16 x i1> %1 to <16 x i8>
    182   ret <16 x i8> %2
    183 }
    184 
    185 ;
    186 ; 256-bit vectors
    187 ;
    188 
    189 define <4 x i64> @ext_i4_4i64(i4 %a0) {
    190 ; SSE2-SSSE3-LABEL: ext_i4_4i64:
    191 ; SSE2-SSSE3:       # %bb.0:
    192 ; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
    193 ; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
    194 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
    195 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
    196 ; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm1
    197 ; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
    198 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
    199 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
    200 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
    201 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4,8]
    202 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
    203 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
    204 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
    205 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
    206 ; SSE2-SSSE3-NEXT:    retq
    207 ;
    208 ; AVX1-LABEL: ext_i4_4i64:
    209 ; AVX1:       # %bb.0:
    210 ; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
    211 ; AVX1-NEXT:    vmovq %rdi, %xmm0
    212 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
    213 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    214 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    215 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    216 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    217 ; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
    218 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
    219 ; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
    220 ; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
    221 ; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
    222 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    223 ; AVX1-NEXT:    retq
    224 ;
    225 ; AVX2-LABEL: ext_i4_4i64:
    226 ; AVX2:       # %bb.0:
    227 ; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
    228 ; AVX2-NEXT:    vmovq %rdi, %xmm0
    229 ; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
    230 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
    231 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    232 ; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
    233 ; AVX2-NEXT:    retq
    234 ;
    235 ; AVX512-LABEL: ext_i4_4i64:
    236 ; AVX512:       # %bb.0:
    237 ; AVX512-NEXT:    kmovd %edi, %k1
    238 ; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
    239 ; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
    240 ; AVX512-NEXT:    retq
    241   %1 = bitcast i4 %a0 to <4 x i1>
    242   %2 = sext <4 x i1> %1 to <4 x i64>
    243   ret <4 x i64> %2
    244 }
    245 
    246 define <8 x i32> @ext_i8_8i32(i8 %a0) {
    247 ; SSE2-SSSE3-LABEL: ext_i8_8i32:
    248 ; SSE2-SSSE3:       # %bb.0:
    249 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
    250 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
    251 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8]
    252 ; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
    253 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
    254 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm0
    255 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
    256 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
    257 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm1
    258 ; SSE2-SSSE3-NEXT:    retq
    259 ;
    260 ; AVX1-LABEL: ext_i8_8i32:
    261 ; AVX1:       # %bb.0:
    262 ; AVX1-NEXT:    vmovd %edi, %xmm0
    263 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    264 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    265 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    266 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    267 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    268 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
    269 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
    270 ; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
    271 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
    272 ; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
    273 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    274 ; AVX1-NEXT:    retq
    275 ;
    276 ; AVX2-LABEL: ext_i8_8i32:
    277 ; AVX2:       # %bb.0:
    278 ; AVX2-NEXT:    vmovd %edi, %xmm0
    279 ; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
    280 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
    281 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    282 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
    283 ; AVX2-NEXT:    retq
    284 ;
    285 ; AVX512-LABEL: ext_i8_8i32:
    286 ; AVX512:       # %bb.0:
    287 ; AVX512-NEXT:    kmovd %edi, %k1
    288 ; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
    289 ; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
    290 ; AVX512-NEXT:    retq
    291   %1 = bitcast i8 %a0 to <8 x i1>
    292   %2 = sext <8 x i1> %1 to <8 x i32>
    293   ret <8 x i32> %2
    294 }
    295 
    296 define <16 x i16> @ext_i16_16i16(i16 %a0) {
    297 ; SSE2-SSSE3-LABEL: ext_i16_16i16:
    298 ; SSE2-SSSE3:       # %bb.0:
    299 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
    300 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    301 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
    302 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
    303 ; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
    304 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
    305 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm2, %xmm0
    306 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
    307 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
    308 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm2, %xmm1
    309 ; SSE2-SSSE3-NEXT:    retq
    310 ;
    311 ; AVX1-LABEL: ext_i16_16i16:
    312 ; AVX1:       # %bb.0:
    313 ; AVX1-NEXT:    vmovd %edi, %xmm0
    314 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
    315 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    316 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    317 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    318 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    319 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    320 ; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1
    321 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
    322 ; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
    323 ; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
    324 ; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
    325 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    326 ; AVX1-NEXT:    retq
    327 ;
    328 ; AVX2-LABEL: ext_i16_16i16:
    329 ; AVX2:       # %bb.0:
    330 ; AVX2-NEXT:    vmovd %edi, %xmm0
    331 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
    332 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
    333 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    334 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
    335 ; AVX2-NEXT:    retq
    336 ;
    337 ; AVX512-LABEL: ext_i16_16i16:
    338 ; AVX512:       # %bb.0:
    339 ; AVX512-NEXT:    kmovd %edi, %k0
    340 ; AVX512-NEXT:    vpmovm2w %k0, %ymm0
    341 ; AVX512-NEXT:    retq
    342   %1 = bitcast i16 %a0 to <16 x i1>
    343   %2 = sext <16 x i1> %1 to <16 x i16>
    344   ret <16 x i16> %2
    345 }
    346 
    347 define <32 x i8> @ext_i32_32i8(i32 %a0) {
    348 ; SSE2-SSSE3-LABEL: ext_i32_32i8:
    349 ; SSE2-SSSE3:       # %bb.0:
    350 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm1
    351 ; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    352 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
    353 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    354 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
    355 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
    356 ; SSE2-SSSE3-NEXT:    pcmpeqb %xmm2, %xmm0
    357 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
    358 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    359 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
    360 ; SSE2-SSSE3-NEXT:    pcmpeqb %xmm2, %xmm1
    361 ; SSE2-SSSE3-NEXT:    retq
    362 ;
    363 ; AVX1-LABEL: ext_i32_32i8:
    364 ; AVX1:       # %bb.0:
    365 ; AVX1-NEXT:    vmovd %edi, %xmm0
    366 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    367 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
    368 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    369 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
    370 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    371 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    372 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    373 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    374 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    375 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
    376 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
    377 ; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
    378 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
    379 ; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
    380 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    381 ; AVX1-NEXT:    retq
    382 ;
    383 ; AVX2-SLOW-LABEL: ext_i32_32i8:
    384 ; AVX2-SLOW:       # %bb.0:
    385 ; AVX2-SLOW-NEXT:    vmovd %edi, %xmm0
    386 ; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    387 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
    388 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    389 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
    390 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    391 ; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    392 ; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
    393 ; AVX2-SLOW-NEXT:    vpand %ymm1, %ymm0, %ymm0
    394 ; AVX2-SLOW-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
    395 ; AVX2-SLOW-NEXT:    retq
    396 ;
    397 ; AVX2-FAST-LABEL: ext_i32_32i8:
    398 ; AVX2-FAST:       # %bb.0:
    399 ; AVX2-FAST-NEXT:    vmovd %edi, %xmm0
    400 ; AVX2-FAST-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    401 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
    402 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
    403 ; AVX2-FAST-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    404 ; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
    405 ; AVX2-FAST-NEXT:    vpand %ymm1, %ymm0, %ymm0
    406 ; AVX2-FAST-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
    407 ; AVX2-FAST-NEXT:    retq
    408 ;
    409 ; AVX512-LABEL: ext_i32_32i8:
    410 ; AVX512:       # %bb.0:
    411 ; AVX512-NEXT:    kmovd %edi, %k0
    412 ; AVX512-NEXT:    vpmovm2b %k0, %ymm0
    413 ; AVX512-NEXT:    retq
    414   %1 = bitcast i32 %a0 to <32 x i1>
    415   %2 = sext <32 x i1> %1 to <32 x i8>
    416   ret <32 x i8> %2
    417 }
    418 
    419 ;
    420 ; 512-bit vectors
    421 ;
    422 
    423 define <8 x i64> @ext_i8_8i64(i8 %a0) {
    424 ; SSE2-SSSE3-LABEL: ext_i8_8i64:
    425 ; SSE2-SSSE3:       # %bb.0:
    426 ; SSE2-SSSE3-NEXT:    # kill: def $edi killed $edi def $rdi
    427 ; SSE2-SSSE3-NEXT:    movq %rdi, %xmm0
    428 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
    429 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
    430 ; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm1
    431 ; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
    432 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
    433 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
    434 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
    435 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4,8]
    436 ; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm2
    437 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
    438 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
    439 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
    440 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
    441 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32]
    442 ; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm3
    443 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
    444 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm3
    445 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
    446 ; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm2
    447 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [64,128]
    448 ; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm4
    449 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm4
    450 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
    451 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
    452 ; SSE2-SSSE3-NEXT:    retq
    453 ;
    454 ; AVX1-LABEL: ext_i8_8i64:
    455 ; AVX1:       # %bb.0:
    456 ; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
    457 ; AVX1-NEXT:    vmovq %rdi, %xmm0
    458 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
    459 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
    460 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm0
    461 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    462 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    463 ; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm2, %xmm2
    464 ; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
    465 ; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
    466 ; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm0, %xmm0
    467 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
    468 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    469 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
    470 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    471 ; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm2, %xmm2
    472 ; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
    473 ; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm1, %xmm1
    474 ; AVX1-NEXT:    vpxor %xmm4, %xmm1, %xmm1
    475 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
    476 ; AVX1-NEXT:    retq
    477 ;
    478 ; AVX2-LABEL: ext_i8_8i64:
    479 ; AVX2:       # %bb.0:
    480 ; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
    481 ; AVX2-NEXT:    vmovq %rdi, %xmm0
    482 ; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm1
    483 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
    484 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm2
    485 ; AVX2-NEXT:    vpcmpeqq %ymm0, %ymm2, %ymm0
    486 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
    487 ; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
    488 ; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
    489 ; AVX2-NEXT:    retq
    490 ;
    491 ; AVX512-LABEL: ext_i8_8i64:
    492 ; AVX512:       # %bb.0:
    493 ; AVX512-NEXT:    kmovd %edi, %k1
    494 ; AVX512-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    495 ; AVX512-NEXT:    retq
    496   %1 = bitcast i8 %a0 to <8 x i1>
    497   %2 = sext <8 x i1> %1 to <8 x i64>
    498   ret <8 x i64> %2
    499 }
    500 
    501 define <16 x i32> @ext_i16_16i32(i16 %a0) {
    502 ; SSE2-SSSE3-LABEL: ext_i16_16i32:
    503 ; SSE2-SSSE3:       # %bb.0:
    504 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
    505 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
    506 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
    507 ; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm0
    508 ; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
    509 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
    510 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
    511 ; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm1
    512 ; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
    513 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm1
    514 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
    515 ; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
    516 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
    517 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm2
    518 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
    519 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
    520 ; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm3
    521 ; SSE2-SSSE3-NEXT:    retq
    522 ;
    523 ; AVX1-LABEL: ext_i16_16i32:
    524 ; AVX1:       # %bb.0:
    525 ; AVX1-NEXT:    vmovd %edi, %xmm0
    526 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    527 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
    528 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm0
    529 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    530 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    531 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
    532 ; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
    533 ; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
    534 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
    535 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
    536 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    537 ; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
    538 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    539 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
    540 ; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
    541 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
    542 ; AVX1-NEXT:    vpxor %xmm4, %xmm1, %xmm1
    543 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
    544 ; AVX1-NEXT:    retq
    545 ;
    546 ; AVX2-LABEL: ext_i16_16i32:
    547 ; AVX2:       # %bb.0:
    548 ; AVX2-NEXT:    vmovd %edi, %xmm0
    549 ; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm1
    550 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
    551 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm2
    552 ; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm2, %ymm0
    553 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
    554 ; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
    555 ; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
    556 ; AVX2-NEXT:    retq
    557 ;
    558 ; AVX512-LABEL: ext_i16_16i32:
    559 ; AVX512:       # %bb.0:
    560 ; AVX512-NEXT:    kmovd %edi, %k1
    561 ; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    562 ; AVX512-NEXT:    retq
    563   %1 = bitcast i16 %a0 to <16 x i1>
    564   %2 = sext <16 x i1> %1 to <16 x i32>
    565   ret <16 x i32> %2
    566 }
    567 
    568 define <32 x i16> @ext_i32_32i16(i32 %a0) {
    569 ; SSE2-SSSE3-LABEL: ext_i32_32i16:
    570 ; SSE2-SSSE3:       # %bb.0:
    571 ; SSE2-SSSE3-NEXT:    movd %edi, %xmm2
    572 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
    573 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
    574 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
    575 ; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
    576 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
    577 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm4, %xmm0
    578 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
    579 ; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
    580 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm5, %xmm1
    581 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
    582 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
    583 ; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
    584 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
    585 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm4, %xmm2
    586 ; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm3
    587 ; SSE2-SSSE3-NEXT:    pcmpeqw %xmm5, %xmm3
    588 ; SSE2-SSSE3-NEXT:    retq
    589 ;
    590 ; AVX1-LABEL: ext_i32_32i16:
    591 ; AVX1:       # %bb.0:
    592 ; AVX1-NEXT:    vmovd %edi, %xmm1
    593 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
    594 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
    595 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    596 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
    597 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    598 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    599 ; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    600 ; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm3, %xmm3
    601 ; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
    602 ; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
    603 ; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm0, %xmm0
    604 ; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
    605 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
    606 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
    607 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
    608 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
    609 ; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
    610 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    611 ; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm2, %xmm2
    612 ; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
    613 ; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm1, %xmm1
    614 ; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
    615 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
    616 ; AVX1-NEXT:    retq
    617 ;
    618 ; AVX2-LABEL: ext_i32_32i16:
    619 ; AVX2:       # %bb.0:
    620 ; AVX2-NEXT:    vmovd %edi, %xmm0
    621 ; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
    622 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
    623 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    624 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
    625 ; AVX2-NEXT:    shrl $16, %edi
    626 ; AVX2-NEXT:    vmovd %edi, %xmm2
    627 ; AVX2-NEXT:    vpbroadcastw %xmm2, %ymm2
    628 ; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm2
    629 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm2, %ymm1
    630 ; AVX2-NEXT:    retq
    631 ;
    632 ; AVX512-LABEL: ext_i32_32i16:
    633 ; AVX512:       # %bb.0:
    634 ; AVX512-NEXT:    kmovd %edi, %k0
    635 ; AVX512-NEXT:    vpmovm2w %k0, %zmm0
    636 ; AVX512-NEXT:    retq
    637   %1 = bitcast i32 %a0 to <32 x i1>
    638   %2 = sext <32 x i1> %1 to <32 x i16>
    639   ret <32 x i16> %2
    640 }
    641 
    642 define <64 x i8> @ext_i64_64i8(i64 %a0) {
    643 ; SSE2-SSSE3-LABEL: ext_i64_64i8:
    644 ; SSE2-SSSE3:       # %bb.0:
    645 ; SSE2-SSSE3-NEXT:    movq %rdi, %xmm3
    646 ; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    647 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
    648 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    649 ; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
    650 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
    651 ; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm0
    652 ; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
    653 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
    654 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm1
    655 ; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm1
    656 ; SSE2-SSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
    657 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
    658 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
    659 ; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm2
    660 ; SSE2-SSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
    661 ; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
    662 ; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
    663 ; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm3
    664 ; SSE2-SSSE3-NEXT:    retq
    665 ;
    666 ; AVX1-LABEL: ext_i64_64i8:
    667 ; AVX1:       # %bb.0:
    668 ; AVX1-NEXT:    vmovq %rdi, %xmm0
    669 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    670 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
    671 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    672 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
    673 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
    674 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    675 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
    676 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
    677 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    678 ; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    679 ; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm3, %xmm3
    680 ; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
    681 ; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
    682 ; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm0, %xmm0
    683 ; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
    684 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
    685 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
    686 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
    687 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
    688 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
    689 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
    690 ; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
    691 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    692 ; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm2, %xmm2
    693 ; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
    694 ; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm1, %xmm1
    695 ; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
    696 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
    697 ; AVX1-NEXT:    retq
    698 ;
    699 ; AVX2-SLOW-LABEL: ext_i64_64i8:
    700 ; AVX2-SLOW:       # %bb.0:
    701 ; AVX2-SLOW-NEXT:    vmovq %rdi, %xmm0
    702 ; AVX2-SLOW-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    703 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
    704 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
    705 ; AVX2-SLOW-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
    706 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
    707 ; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    708 ; AVX2-SLOW-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
    709 ; AVX2-SLOW-NEXT:    vpand %ymm2, %ymm0, %ymm0
    710 ; AVX2-SLOW-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
    711 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
    712 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
    713 ; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
    714 ; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
    715 ; AVX2-SLOW-NEXT:    vinserti128 $1, %xmm1, %ymm3, %ymm1
    716 ; AVX2-SLOW-NEXT:    vpand %ymm2, %ymm1, %ymm1
    717 ; AVX2-SLOW-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
    718 ; AVX2-SLOW-NEXT:    retq
    719 ;
    720 ; AVX2-FAST-LABEL: ext_i64_64i8:
    721 ; AVX2-FAST:       # %bb.0:
    722 ; AVX2-FAST-NEXT:    vmovq %rdi, %xmm0
    723 ; AVX2-FAST-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
    724 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm1[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
    725 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm2 = xmm1[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
    726 ; AVX2-FAST-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    727 ; AVX2-FAST-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
    728 ; AVX2-FAST-NEXT:    vpand %ymm2, %ymm0, %ymm0
    729 ; AVX2-FAST-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
    730 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm3 = xmm1[8,9,8,9,8,9,8,9,10,11,10,11,10,11,10,11]
    731 ; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,12,13,12,13,14,15,14,15,14,15,14,15]
    732 ; AVX2-FAST-NEXT:    vinserti128 $1, %xmm1, %ymm3, %ymm1
    733 ; AVX2-FAST-NEXT:    vpand %ymm2, %ymm1, %ymm1
    734 ; AVX2-FAST-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
    735 ; AVX2-FAST-NEXT:    retq
    736 ;
    737 ; AVX512-LABEL: ext_i64_64i8:
    738 ; AVX512:       # %bb.0:
    739 ; AVX512-NEXT:    kmovq %rdi, %k0
    740 ; AVX512-NEXT:    vpmovm2b %k0, %zmm0
    741 ; AVX512-NEXT:    retq
    742   %1 = bitcast i64 %a0 to <64 x i1>
    743   %2 = sext <64 x i1> %1 to <64 x i8>
    744   ret <64 x i8> %2
    745 }
    746