Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX256 --check-prefix=AVX256VL
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512NOBW --check-prefix=AVX512VL
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX256 --check-prefix=AVX256VLBW
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VLBW
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512NOBW --check-prefix=AVX512F
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512NOBW --check-prefix=AVX512F
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
     10 
     11 define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<8 x i32>* %a, <8 x i32>* %b) {
     12 ; AVX256VL-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
     13 ; AVX256VL:       # %bb.0:
     14 ; AVX256VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
     15 ; AVX256VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
     16 ; AVX256VL-NEXT:    vpcmpeqd (%rsi), %ymm0, %k2
     17 ; AVX256VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
     18 ; AVX256VL-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k2} {z}
     19 ; AVX256VL-NEXT:    vpmovdw %ymm1, %xmm1
     20 ; AVX256VL-NEXT:    vmovdqa32 %ymm0, %ymm2 {%k1} {z}
     21 ; AVX256VL-NEXT:    vpmovdw %ymm2, %xmm2
     22 ; AVX256VL-NEXT:    vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2],xmm2[3],xmm1[4],xmm2[5,6,7]
     23 ; AVX256VL-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[6,7,12,13,4,5,8,9,6,7,14,15,14,15,0,1]
     24 ; AVX256VL-NEXT:    vpmovsxwd %xmm3, %ymm3
     25 ; AVX256VL-NEXT:    vpslld $31, %ymm3, %ymm3
     26 ; AVX256VL-NEXT:    vptestmd %ymm3, %ymm3, %k1
     27 ; AVX256VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,1,3]
     28 ; AVX256VL-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[6,7,12,13,2,3,14,15,6,7,6,7,14,15,0,1]
     29 ; AVX256VL-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3],xmm2[4],xmm1[5],xmm2[6,7]
     30 ; AVX256VL-NEXT:    vpmovsxwd %xmm1, %ymm1
     31 ; AVX256VL-NEXT:    vpslld $31, %ymm1, %ymm1
     32 ; AVX256VL-NEXT:    vptestmd %ymm1, %ymm1, %k0
     33 ; AVX256VL-NEXT:    kunpckbw %k1, %k0, %k0
     34 ; AVX256VL-NEXT:    kshiftrw $8, %k0, %k2
     35 ; AVX256VL-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k2} {z}
     36 ; AVX256VL-NEXT:    vpmovdw %ymm1, %xmm1
     37 ; AVX256VL-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
     38 ; AVX256VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
     39 ; AVX256VL-NEXT:    vpmovdw %ymm0, %xmm0
     40 ; AVX256VL-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
     41 ; AVX256VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     42 ; AVX256VL-NEXT:    vzeroupper
     43 ; AVX256VL-NEXT:    retq
     44 ;
     45 ; AVX512VL-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
     46 ; AVX512VL:       # %bb.0:
     47 ; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
     48 ; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
     49 ; AVX512VL-NEXT:    vpcmpeqd (%rsi), %ymm0, %k2
     50 ; AVX512VL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
     51 ; AVX512VL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
     52 ; AVX512VL-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
     53 ; AVX512VL-NEXT:    vpermi2d %zmm0, %zmm1, %zmm2
     54 ; AVX512VL-NEXT:    vptestmd %zmm2, %zmm2, %k1
     55 ; AVX512VL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
     56 ; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
     57 ; AVX512VL-NEXT:    vzeroupper
     58 ; AVX512VL-NEXT:    retq
     59 ;
     60 ; AVX256VLBW-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
     61 ; AVX256VLBW:       # %bb.0:
     62 ; AVX256VLBW-NEXT:    vpxor %xmm0, %xmm0, %xmm0
     63 ; AVX256VLBW-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
     64 ; AVX256VLBW-NEXT:    vpcmpeqd (%rsi), %ymm0, %k1
     65 ; AVX256VLBW-NEXT:    vpmovm2w %k1, %ymm0
     66 ; AVX256VLBW-NEXT:    vpmovm2w %k0, %ymm1
     67 ; AVX256VLBW-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
     68 ; AVX256VLBW-NEXT:    vpermi2w %ymm0, %ymm1, %ymm2
     69 ; AVX256VLBW-NEXT:    vpmovw2m %ymm2, %k0
     70 ; AVX256VLBW-NEXT:    vpmovm2b %k0, %xmm0
     71 ; AVX256VLBW-NEXT:    vzeroupper
     72 ; AVX256VLBW-NEXT:    retq
     73 ;
     74 ; AVX512VLBW-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
     75 ; AVX512VLBW:       # %bb.0:
     76 ; AVX512VLBW-NEXT:    vpxor %xmm0, %xmm0, %xmm0
     77 ; AVX512VLBW-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
     78 ; AVX512VLBW-NEXT:    vpcmpeqd (%rsi), %ymm0, %k2
     79 ; AVX512VLBW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
     80 ; AVX512VLBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
     81 ; AVX512VLBW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
     82 ; AVX512VLBW-NEXT:    vpermi2d %zmm0, %zmm1, %zmm2
     83 ; AVX512VLBW-NEXT:    vptestmd %zmm2, %zmm2, %k0
     84 ; AVX512VLBW-NEXT:    vpmovm2b %k0, %xmm0
     85 ; AVX512VLBW-NEXT:    vzeroupper
     86 ; AVX512VLBW-NEXT:    retq
     87 ;
     88 ; AVX512F-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
     89 ; AVX512F:       # %bb.0:
     90 ; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
     91 ; AVX512F-NEXT:    vmovdqa (%rsi), %ymm1
     92 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k1
     93 ; AVX512F-NEXT:    vptestnmd %zmm1, %zmm1, %k2
     94 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
     95 ; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
     96 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
     97 ; AVX512F-NEXT:    vpermi2d %zmm0, %zmm1, %zmm2
     98 ; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k1
     99 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    100 ; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
    101 ; AVX512F-NEXT:    vzeroupper
    102 ; AVX512F-NEXT:    retq
    103 ;
    104 ; AVX512BW-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
    105 ; AVX512BW:       # %bb.0:
    106 ; AVX512BW-NEXT:    vmovdqa (%rdi), %ymm0
    107 ; AVX512BW-NEXT:    vmovdqa (%rsi), %ymm1
    108 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k1
    109 ; AVX512BW-NEXT:    vptestnmd %zmm1, %zmm1, %k2
    110 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
    111 ; AVX512BW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
    112 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [3,6,18,20,3,7,7,0,3,6,1,21,3,19,7,0]
    113 ; AVX512BW-NEXT:    vpermi2d %zmm0, %zmm1, %zmm2
    114 ; AVX512BW-NEXT:    vptestmd %zmm2, %zmm2, %k0
    115 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
    116 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    117 ; AVX512BW-NEXT:    vzeroupper
    118 ; AVX512BW-NEXT:    retq
    119 
    120   %a1 = load <8 x i32>, <8 x i32>* %a
    121   %b1 = load <8 x i32>, <8 x i32>* %b
    122   %a2 = icmp eq <8 x i32> %a1, zeroinitializer
    123   %b2 = icmp eq <8 x i32> %b1, zeroinitializer
    124   %c = shufflevector <8 x i1> %a2, <8 x i1> %b2, <16 x i32> <i32 3, i32 6, i32 10, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 11, i32 7, i32 0>
    125   ret <16 x i1> %c
    126 }
    127 
    128 define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<32 x i8> %a) {
    129 ; AVX256VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
    130 ; AVX256VL:       # %bb.0:
    131 ; AVX256VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    132 ; AVX256VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
    133 ; AVX256VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    134 ; AVX256VL-NEXT:    vpmovsxbw %xmm1, %xmm1
    135 ; AVX256VL-NEXT:    vpmovsxwd %xmm1, %ymm1
    136 ; AVX256VL-NEXT:    vptestmd %ymm1, %ymm1, %k1
    137 ; AVX256VL-NEXT:    vpmovsxbw %xmm0, %ymm0
    138 ; AVX256VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
    139 ; AVX256VL-NEXT:    vpmovsxwd %xmm1, %ymm1
    140 ; AVX256VL-NEXT:    vptestmd %ymm1, %ymm1, %k2
    141 ; AVX256VL-NEXT:    vpmovsxwd %xmm0, %ymm0
    142 ; AVX256VL-NEXT:    vptestmd %ymm0, %ymm0, %k3
    143 ; AVX256VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
    144 ; AVX256VL-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k3} {z}
    145 ; AVX256VL-NEXT:    vpmovdw %ymm1, %xmm1
    146 ; AVX256VL-NEXT:    vmovdqa32 %ymm0, %ymm2 {%k2} {z}
    147 ; AVX256VL-NEXT:    vpmovdw %ymm2, %xmm2
    148 ; AVX256VL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    149 ; AVX256VL-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
    150 ; AVX256VL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3],ymm2[4,5],ymm1[6],ymm2[7]
    151 ; AVX256VL-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[6,7,12,13,u,u,8,9,6,7,14,15,14,15,0,1,22,23,28,29,18,19,26,27,22,23,u,u,30,31,16,17]
    152 ; AVX256VL-NEXT:    vmovdqa32 %ymm0, %ymm2 {%k1} {z}
    153 ; AVX256VL-NEXT:    vpmovdw %ymm2, %xmm2
    154 ; AVX256VL-NEXT:    kshiftrw $8, %k1, %k1
    155 ; AVX256VL-NEXT:    vmovdqa32 %ymm0, %ymm3 {%k1} {z}
    156 ; AVX256VL-NEXT:    vpmovdw %ymm3, %xmm3
    157 ; AVX256VL-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
    158 ; AVX256VL-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[1,1,2,1]
    159 ; AVX256VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255]
    160 ; AVX256VL-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
    161 ; AVX256VL-NEXT:    vpmovsxwd %xmm1, %ymm2
    162 ; AVX256VL-NEXT:    vpslld $31, %ymm2, %ymm2
    163 ; AVX256VL-NEXT:    vptestmd %ymm2, %ymm2, %k1
    164 ; AVX256VL-NEXT:    vextracti128 $1, %ymm1, %xmm1
    165 ; AVX256VL-NEXT:    vpmovsxwd %xmm1, %ymm1
    166 ; AVX256VL-NEXT:    vpslld $31, %ymm1, %ymm1
    167 ; AVX256VL-NEXT:    vptestmd %ymm1, %ymm1, %k0
    168 ; AVX256VL-NEXT:    kunpckbw %k1, %k0, %k0
    169 ; AVX256VL-NEXT:    kshiftrw $8, %k0, %k2
    170 ; AVX256VL-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k2} {z}
    171 ; AVX256VL-NEXT:    vpmovdw %ymm1, %xmm1
    172 ; AVX256VL-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
    173 ; AVX256VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
    174 ; AVX256VL-NEXT:    vpmovdw %ymm0, %xmm0
    175 ; AVX256VL-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
    176 ; AVX256VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    177 ; AVX256VL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    178 ; AVX256VL-NEXT:    retq
    179 ;
    180 ; AVX512NOBW-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
    181 ; AVX512NOBW:       # %bb.0:
    182 ; AVX512NOBW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    183 ; AVX512NOBW-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
    184 ; AVX512NOBW-NEXT:    vpmovsxbd %xmm0, %zmm1
    185 ; AVX512NOBW-NEXT:    vptestmd %zmm1, %zmm1, %k1
    186 ; AVX512NOBW-NEXT:    vextracti128 $1, %ymm0, %xmm0
    187 ; AVX512NOBW-NEXT:    vpmovsxbd %xmm0, %zmm0
    188 ; AVX512NOBW-NEXT:    vptestmd %zmm0, %zmm0, %k2
    189 ; AVX512NOBW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
    190 ; AVX512NOBW-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
    191 ; AVX512NOBW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
    192 ; AVX512NOBW-NEXT:    vpermi2d %zmm0, %zmm1, %zmm2
    193 ; AVX512NOBW-NEXT:    vptestmd %zmm2, %zmm2, %k1
    194 ; AVX512NOBW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    195 ; AVX512NOBW-NEXT:    vpmovdb %zmm0, %xmm0
    196 ; AVX512NOBW-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    197 ; AVX512NOBW-NEXT:    retq
    198 ;
    199 ; AVX256VLBW-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
    200 ; AVX256VLBW:       # %bb.0:
    201 ; AVX256VLBW-NEXT:    vptestnmb %ymm0, %ymm0, %k0
    202 ; AVX256VLBW-NEXT:    vpmovm2b %k0, %ymm0
    203 ; AVX256VLBW-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
    204 ; AVX256VLBW-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
    205 ; AVX256VLBW-NEXT:    movl $-537190396, %eax # imm = 0xDFFB2004
    206 ; AVX256VLBW-NEXT:    kmovd %eax, %k1
    207 ; AVX256VLBW-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm1[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
    208 ; AVX256VLBW-NEXT:    vpmovb2m %ymm0, %k0
    209 ; AVX256VLBW-NEXT:    vpmovm2b %k0, %ymm0
    210 ; AVX256VLBW-NEXT:    retq
    211 ;
    212 ; AVX512VLBW-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
    213 ; AVX512VLBW:       # %bb.0:
    214 ; AVX512VLBW-NEXT:    vptestnmb %ymm0, %ymm0, %k0
    215 ; AVX512VLBW-NEXT:    vpmovm2w %k0, %zmm0
    216 ; AVX512VLBW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
    217 ; AVX512VLBW-NEXT:    vpermw %zmm0, %zmm1, %zmm0
    218 ; AVX512VLBW-NEXT:    vpmovw2m %zmm0, %k0
    219 ; AVX512VLBW-NEXT:    vpmovm2b %k0, %ymm0
    220 ; AVX512VLBW-NEXT:    retq
    221 ;
    222 ; AVX512BW-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
    223 ; AVX512BW:       # %bb.0:
    224 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
    225 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
    226 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
    227 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
    228 ; AVX512BW-NEXT:    vpermw %zmm0, %zmm1, %zmm0
    229 ; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
    230 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
    231 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    232 ; AVX512BW-NEXT:    retq
    233   %cmp = icmp eq <32 x i8> %a, zeroinitializer
    234   %b = shufflevector <32 x i1> %cmp, <32 x i1> undef, <32 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0, i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0>
    235   ret <32 x i1> %b
    236 }
    237 
    238