Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
      3 
      4 define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
      5 ; ALL-LABEL: testv8i64:
      6 ; ALL:       ## BB#0:
      7 ; ALL-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
      8 ; ALL-NEXT:    vpextrq $1, %xmm1, %rax
      9 ; ALL-NEXT:    popcntq %rax, %rax
     10 ; ALL-NEXT:    vmovq %rax, %xmm2
     11 ; ALL-NEXT:    vmovq %xmm1, %rax
     12 ; ALL-NEXT:    popcntq %rax, %rax
     13 ; ALL-NEXT:    vmovq %rax, %xmm1
     14 ; ALL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
     15 ; ALL-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
     16 ; ALL-NEXT:    vpextrq $1, %xmm2, %rax
     17 ; ALL-NEXT:    popcntq %rax, %rax
     18 ; ALL-NEXT:    vmovq %rax, %xmm3
     19 ; ALL-NEXT:    vmovq %xmm2, %rax
     20 ; ALL-NEXT:    popcntq %rax, %rax
     21 ; ALL-NEXT:    vmovq %rax, %xmm2
     22 ; ALL-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
     23 ; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
     24 ; ALL-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
     25 ; ALL-NEXT:    vpextrq $1, %xmm2, %rax
     26 ; ALL-NEXT:    popcntq %rax, %rax
     27 ; ALL-NEXT:    vmovq %rax, %xmm3
     28 ; ALL-NEXT:    vmovq %xmm2, %rax
     29 ; ALL-NEXT:    popcntq %rax, %rax
     30 ; ALL-NEXT:    vmovq %rax, %xmm2
     31 ; ALL-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
     32 ; ALL-NEXT:    vpextrq $1, %xmm0, %rax
     33 ; ALL-NEXT:    popcntq %rax, %rax
     34 ; ALL-NEXT:    vmovq %rax, %xmm3
     35 ; ALL-NEXT:    vmovq %xmm0, %rax
     36 ; ALL-NEXT:    popcntq %rax, %rax
     37 ; ALL-NEXT:    vmovq %rax, %xmm0
     38 ; ALL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
     39 ; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
     40 ; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
     41 ; ALL-NEXT:    retq
     42   %out = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %in)
     43   ret <8 x i64> %out
     44 }
     45 
     46 define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
     47 ; ALL-LABEL: testv16i32:
     48 ; ALL:       ## BB#0:
     49 ; ALL-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
     50 ; ALL-NEXT:    vpextrd $1, %xmm1, %eax
     51 ; ALL-NEXT:    popcntl %eax, %eax
     52 ; ALL-NEXT:    vmovd %xmm1, %ecx
     53 ; ALL-NEXT:    popcntl %ecx, %ecx
     54 ; ALL-NEXT:    vmovd %ecx, %xmm2
     55 ; ALL-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
     56 ; ALL-NEXT:    vpextrd $2, %xmm1, %eax
     57 ; ALL-NEXT:    popcntl %eax, %eax
     58 ; ALL-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
     59 ; ALL-NEXT:    vpextrd $3, %xmm1, %eax
     60 ; ALL-NEXT:    popcntl %eax, %eax
     61 ; ALL-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
     62 ; ALL-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
     63 ; ALL-NEXT:    vpextrd $1, %xmm2, %eax
     64 ; ALL-NEXT:    popcntl %eax, %eax
     65 ; ALL-NEXT:    vmovd %xmm2, %ecx
     66 ; ALL-NEXT:    popcntl %ecx, %ecx
     67 ; ALL-NEXT:    vmovd %ecx, %xmm3
     68 ; ALL-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
     69 ; ALL-NEXT:    vpextrd $2, %xmm2, %eax
     70 ; ALL-NEXT:    popcntl %eax, %eax
     71 ; ALL-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
     72 ; ALL-NEXT:    vpextrd $3, %xmm2, %eax
     73 ; ALL-NEXT:    popcntl %eax, %eax
     74 ; ALL-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
     75 ; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
     76 ; ALL-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
     77 ; ALL-NEXT:    vpextrd $1, %xmm2, %eax
     78 ; ALL-NEXT:    popcntl %eax, %eax
     79 ; ALL-NEXT:    vmovd %xmm2, %ecx
     80 ; ALL-NEXT:    popcntl %ecx, %ecx
     81 ; ALL-NEXT:    vmovd %ecx, %xmm3
     82 ; ALL-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
     83 ; ALL-NEXT:    vpextrd $2, %xmm2, %eax
     84 ; ALL-NEXT:    popcntl %eax, %eax
     85 ; ALL-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
     86 ; ALL-NEXT:    vpextrd $3, %xmm2, %eax
     87 ; ALL-NEXT:    popcntl %eax, %eax
     88 ; ALL-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
     89 ; ALL-NEXT:    vpextrd $1, %xmm0, %eax
     90 ; ALL-NEXT:    popcntl %eax, %eax
     91 ; ALL-NEXT:    vmovd %xmm0, %ecx
     92 ; ALL-NEXT:    popcntl %ecx, %ecx
     93 ; ALL-NEXT:    vmovd %ecx, %xmm3
     94 ; ALL-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
     95 ; ALL-NEXT:    vpextrd $2, %xmm0, %eax
     96 ; ALL-NEXT:    popcntl %eax, %eax
     97 ; ALL-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
     98 ; ALL-NEXT:    vpextrd $3, %xmm0, %eax
     99 ; ALL-NEXT:    popcntl %eax, %eax
    100 ; ALL-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm0
    101 ; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    102 ; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    103 ; ALL-NEXT:    retq
    104   %out = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %in)
    105   ret <16 x i32> %out
    106 }
    107 
    108 define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
    109 ; ALL-LABEL: testv32i16:
    110 ; ALL:       ## BB#0:
    111 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    112 ; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm3
    113 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    114 ; ALL-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    115 ; ALL-NEXT:    vpsrlw $4, %ymm0, %ymm0
    116 ; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm0
    117 ; ALL-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
    118 ; ALL-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
    119 ; ALL-NEXT:    vpsllw $8, %ymm0, %ymm3
    120 ; ALL-NEXT:    vpaddb %ymm0, %ymm3, %ymm0
    121 ; ALL-NEXT:    vpsrlw $8, %ymm0, %ymm0
    122 ; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm3
    123 ; ALL-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    124 ; ALL-NEXT:    vpsrlw $4, %ymm1, %ymm1
    125 ; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm1
    126 ; ALL-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
    127 ; ALL-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
    128 ; ALL-NEXT:    vpsllw $8, %ymm1, %ymm2
    129 ; ALL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    130 ; ALL-NEXT:    vpsrlw $8, %ymm1, %ymm1
    131 ; ALL-NEXT:    retq
    132   %out = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %in)
    133   ret <32 x i16> %out
    134 }
    135 
    136 define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
    137 ; ALL-LABEL: testv64i8:
    138 ; ALL:       ## BB#0:
    139 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    140 ; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm3
    141 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    142 ; ALL-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    143 ; ALL-NEXT:    vpsrlw $4, %ymm0, %ymm0
    144 ; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm0
    145 ; ALL-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
    146 ; ALL-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
    147 ; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm3
    148 ; ALL-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    149 ; ALL-NEXT:    vpsrlw $4, %ymm1, %ymm1
    150 ; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm1
    151 ; ALL-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
    152 ; ALL-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
    153 ; ALL-NEXT:    retq
    154   %out = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %in)
    155   ret <64 x i8> %out
    156 }
    157 
    158 declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)
    159 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
    160 declare <32 x i16> @llvm.ctpop.v32i16(<32 x i16>)
    161 declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>)
    162