Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VPOPCNTDQ --check-prefix=AVX512VPOPCNTDQ-NOBW
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VPOPCNTDQ --check-prefix=AVX512VPOPCNTDQ-BW
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=AVX512 --check-prefix=BITALG
      7 
      8 define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
      9 ; AVX512F-LABEL: testv8i64:
     10 ; AVX512F:       # %bb.0:
     11 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
     12 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
     13 ; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm3
     14 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
     15 ; AVX512F-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
     16 ; AVX512F-NEXT:    vpsrlw $4, %ymm1, %ymm1
     17 ; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm1
     18 ; AVX512F-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
     19 ; AVX512F-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
     20 ; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
     21 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm1, %ymm1
     22 ; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm5
     23 ; AVX512F-NEXT:    vpshufb %ymm5, %ymm4, %ymm5
     24 ; AVX512F-NEXT:    vpsrlw $4, %ymm0, %ymm0
     25 ; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm0
     26 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
     27 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
     28 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
     29 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
     30 ; AVX512F-NEXT:    retq
     31 ;
     32 ; AVX512BW-LABEL: testv8i64:
     33 ; AVX512BW:       # %bb.0:
     34 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
     35 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
     36 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
     37 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
     38 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
     39 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
     40 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
     41 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
     42 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
     43 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
     44 ; AVX512BW-NEXT:    retq
     45 ;
     46 ; AVX512VPOPCNTDQ-LABEL: testv8i64:
     47 ; AVX512VPOPCNTDQ:       # %bb.0:
     48 ; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
     49 ; AVX512VPOPCNTDQ-NEXT:    retq
     50 ;
     51 ; BITALG-LABEL: testv8i64:
     52 ; BITALG:       # %bb.0:
     53 ; BITALG-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
     54 ; BITALG-NEXT:    vpandq %zmm1, %zmm0, %zmm2
     55 ; BITALG-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
     56 ; BITALG-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
     57 ; BITALG-NEXT:    vpsrlw $4, %zmm0, %zmm0
     58 ; BITALG-NEXT:    vpandq %zmm1, %zmm0, %zmm0
     59 ; BITALG-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
     60 ; BITALG-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
     61 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
     62 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
     63 ; BITALG-NEXT:    retq
     64   %out = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %in)
     65   ret <8 x i64> %out
     66 }
     67 
     68 define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
     69 ; AVX512F-LABEL: testv16i32:
     70 ; AVX512F:       # %bb.0:
     71 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
     72 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
     73 ; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm3
     74 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
     75 ; AVX512F-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
     76 ; AVX512F-NEXT:    vpsrlw $4, %ymm1, %ymm1
     77 ; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm1
     78 ; AVX512F-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
     79 ; AVX512F-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
     80 ; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
     81 ; AVX512F-NEXT:    vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7]
     82 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm5, %ymm5
     83 ; AVX512F-NEXT:    vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5]
     84 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm1, %ymm1
     85 ; AVX512F-NEXT:    vpackuswb %ymm5, %ymm1, %ymm1
     86 ; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm5
     87 ; AVX512F-NEXT:    vpshufb %ymm5, %ymm4, %ymm5
     88 ; AVX512F-NEXT:    vpsrlw $4, %ymm0, %ymm0
     89 ; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm0
     90 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
     91 ; AVX512F-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
     92 ; AVX512F-NEXT:    vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7]
     93 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm2, %ymm2
     94 ; AVX512F-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5]
     95 ; AVX512F-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
     96 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
     97 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
     98 ; AVX512F-NEXT:    retq
     99 ;
    100 ; AVX512BW-LABEL: testv16i32:
    101 ; AVX512BW:       # %bb.0:
    102 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    103 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
    104 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    105 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    106 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
    107 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    108 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
    109 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
    110 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    111 ; AVX512BW-NEXT:    vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
    112 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm2, %zmm2
    113 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
    114 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
    115 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
    116 ; AVX512BW-NEXT:    retq
    117 ;
    118 ; AVX512VPOPCNTDQ-LABEL: testv16i32:
    119 ; AVX512VPOPCNTDQ:       # %bb.0:
    120 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
    121 ; AVX512VPOPCNTDQ-NEXT:    retq
    122 ;
    123 ; BITALG-LABEL: testv16i32:
    124 ; BITALG:       # %bb.0:
    125 ; BITALG-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    126 ; BITALG-NEXT:    vpandq %zmm1, %zmm0, %zmm2
    127 ; BITALG-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    128 ; BITALG-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    129 ; BITALG-NEXT:    vpsrlw $4, %zmm0, %zmm0
    130 ; BITALG-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    131 ; BITALG-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
    132 ; BITALG-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
    133 ; BITALG-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    134 ; BITALG-NEXT:    vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
    135 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm2, %zmm2
    136 ; BITALG-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
    137 ; BITALG-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
    138 ; BITALG-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
    139 ; BITALG-NEXT:    retq
    140   %out = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %in)
    141   ret <16 x i32> %out
    142 }
    143 
    144 define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
    145 ; AVX512F-LABEL: testv32i16:
    146 ; AVX512F:       # %bb.0:
    147 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    148 ; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm3
    149 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    150 ; AVX512F-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    151 ; AVX512F-NEXT:    vpsrlw $4, %ymm0, %ymm0
    152 ; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm0
    153 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
    154 ; AVX512F-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
    155 ; AVX512F-NEXT:    vpsllw $8, %ymm0, %ymm3
    156 ; AVX512F-NEXT:    vpaddb %ymm0, %ymm3, %ymm0
    157 ; AVX512F-NEXT:    vpsrlw $8, %ymm0, %ymm0
    158 ; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm3
    159 ; AVX512F-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    160 ; AVX512F-NEXT:    vpsrlw $4, %ymm1, %ymm1
    161 ; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm1
    162 ; AVX512F-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
    163 ; AVX512F-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
    164 ; AVX512F-NEXT:    vpsllw $8, %ymm1, %ymm2
    165 ; AVX512F-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    166 ; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm1
    167 ; AVX512F-NEXT:    retq
    168 ;
    169 ; AVX512BW-LABEL: testv32i16:
    170 ; AVX512BW:       # %bb.0:
    171 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    172 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
    173 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    174 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    175 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
    176 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    177 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
    178 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
    179 ; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
    180 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
    181 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
    182 ; AVX512BW-NEXT:    retq
    183 ;
    184 ; AVX512VPOPCNTDQ-NOBW-LABEL: testv32i16:
    185 ; AVX512VPOPCNTDQ-NOBW:       # %bb.0:
    186 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    187 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpopcntd %zmm0, %zmm0
    188 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmovdw %zmm0, %ymm0
    189 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
    190 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpopcntd %zmm1, %zmm1
    191 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpmovdw %zmm1, %ymm1
    192 ; AVX512VPOPCNTDQ-NOBW-NEXT:    retq
    193 ;
    194 ; AVX512VPOPCNTDQ-BW-LABEL: testv32i16:
    195 ; AVX512VPOPCNTDQ-BW:       # %bb.0:
    196 ; AVX512VPOPCNTDQ-BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    197 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
    198 ; AVX512VPOPCNTDQ-BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    199 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    200 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
    201 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    202 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
    203 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
    204 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsllw $8, %zmm0, %zmm1
    205 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
    206 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
    207 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
    208 ;
    209 ; BITALG-LABEL: testv32i16:
    210 ; BITALG:       # %bb.0:
    211 ; BITALG-NEXT:    vpopcntw %zmm0, %zmm0
    212 ; BITALG-NEXT:    retq
    213   %out = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %in)
    214   ret <32 x i16> %out
    215 }
    216 
    217 define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
    218 ; AVX512F-LABEL: testv64i8:
    219 ; AVX512F:       # %bb.0:
    220 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    221 ; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm3
    222 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    223 ; AVX512F-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    224 ; AVX512F-NEXT:    vpsrlw $4, %ymm0, %ymm0
    225 ; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm0
    226 ; AVX512F-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
    227 ; AVX512F-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
    228 ; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm3
    229 ; AVX512F-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    230 ; AVX512F-NEXT:    vpsrlw $4, %ymm1, %ymm1
    231 ; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm1
    232 ; AVX512F-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
    233 ; AVX512F-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
    234 ; AVX512F-NEXT:    retq
    235 ;
    236 ; AVX512BW-LABEL: testv64i8:
    237 ; AVX512BW:       # %bb.0:
    238 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    239 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
    240 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    241 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    242 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
    243 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    244 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
    245 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
    246 ; AVX512BW-NEXT:    retq
    247 ;
    248 ; AVX512VPOPCNTDQ-NOBW-LABEL: testv64i8:
    249 ; AVX512VPOPCNTDQ-NOBW:       # %bb.0:
    250 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    251 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm2, %ymm0, %ymm3
    252 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    253 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    254 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpsrlw $4, %ymm0, %ymm0
    255 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm2, %ymm0, %ymm0
    256 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
    257 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
    258 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm2, %ymm1, %ymm3
    259 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    260 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpsrlw $4, %ymm1, %ymm1
    261 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpand %ymm2, %ymm1, %ymm1
    262 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
    263 ; AVX512VPOPCNTDQ-NOBW-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
    264 ; AVX512VPOPCNTDQ-NOBW-NEXT:    retq
    265 ;
    266 ; AVX512VPOPCNTDQ-BW-LABEL: testv64i8:
    267 ; AVX512VPOPCNTDQ-BW:       # %bb.0:
    268 ; AVX512VPOPCNTDQ-BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    269 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
    270 ; AVX512VPOPCNTDQ-BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    271 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    272 ; AVX512VPOPCNTDQ-BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
    273 ; AVX512VPOPCNTDQ-BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    274 ; AVX512VPOPCNTDQ-BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
    275 ; AVX512VPOPCNTDQ-BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
    276 ; AVX512VPOPCNTDQ-BW-NEXT:    retq
    277 ;
    278 ; BITALG-LABEL: testv64i8:
    279 ; BITALG:       # %bb.0:
    280 ; BITALG-NEXT:    vpopcntb %zmm0, %zmm0
    281 ; BITALG-NEXT:    retq
    282   %out = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %in)
    283   ret <64 x i8> %out
    284 }
    285 
    286 declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)
    287 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
    288 declare <32 x i16> @llvm.ctpop.v32i16(<32 x i16>)
    289 declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>)
    290