1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQVL 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=BITALG_NOVLX 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG 8 9 define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { 10 ; AVX1-LABEL: testv4i64: 11 ; AVX1: # %bb.0: 12 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 13 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 14 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 15 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 16 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 17 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 18 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 19 ; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1 20 ; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 21 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 22 ; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1 23 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5 24 ; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 25 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 26 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 27 ; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0 28 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 29 ; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0 30 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 31 ; AVX1-NEXT: retq 32 ; 33 ; AVX2-LABEL: testv4i64: 34 ; AVX2: # %bb.0: 35 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 36 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 37 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 38 ; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 39 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 40 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 41 ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 42 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 43 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 44 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 45 ; AVX2-NEXT: retq 46 ; 47 ; AVX512VPOPCNTDQ-LABEL: testv4i64: 48 ; AVX512VPOPCNTDQ: # %bb.0: 49 ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 50 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 51 ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 52 ; AVX512VPOPCNTDQ-NEXT: retq 53 ; 54 ; AVX512VPOPCNTDQVL-LABEL: testv4i64: 55 ; AVX512VPOPCNTDQVL: # %bb.0: 56 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 57 ; AVX512VPOPCNTDQVL-NEXT: retq 58 ; 59 ; BITALG_NOVLX-LABEL: testv4i64: 60 ; BITALG_NOVLX: # %bb.0: 61 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 62 ; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm2 63 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 64 ; BITALG_NOVLX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 65 ; BITALG_NOVLX-NEXT: vpsrlw $4, %ymm0, %ymm0 66 ; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 67 ; BITALG_NOVLX-NEXT: vpshufb %ymm0, %ymm3, %ymm0 68 ; BITALG_NOVLX-NEXT: vpaddb %ymm2, %ymm0, %ymm0 69 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 70 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 71 ; BITALG_NOVLX-NEXT: retq 72 ; 73 ; BITALG-LABEL: testv4i64: 74 ; BITALG: # %bb.0: 75 ; BITALG-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 76 ; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm2 77 ; BITALG-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 78 ; BITALG-NEXT: vpshufb %ymm2, %ymm3, %ymm2 79 ; BITALG-NEXT: vpsrlw $4, %ymm0, %ymm0 80 ; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 81 ; BITALG-NEXT: vpshufb %ymm0, %ymm3, %ymm0 82 ; BITALG-NEXT: vpaddb %ymm2, %ymm0, %ymm0 83 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 84 ; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 85 ; BITALG-NEXT: retq 86 %out = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %in) 87 ret <4 x i64> %out 88 } 89 90 define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { 91 ; AVX1-LABEL: testv8i32: 92 ; AVX1: # %bb.0: 93 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 94 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 95 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 96 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 97 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 98 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 99 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 100 ; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1 101 ; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 102 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 103 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 104 ; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5 105 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 106 ; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1 107 ; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1 108 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm5 109 ; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5 110 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 111 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 112 ; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0 113 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 114 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3] 115 ; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2 116 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 117 ; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0 118 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 119 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 120 ; AVX1-NEXT: retq 121 ; 122 ; AVX2-LABEL: testv8i32: 123 ; AVX2: # %bb.0: 124 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 125 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 126 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 127 ; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 128 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 129 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 130 ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 131 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 132 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 133 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 134 ; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 135 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 136 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 137 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 138 ; AVX2-NEXT: retq 139 ; 140 ; AVX512VPOPCNTDQ-LABEL: testv8i32: 141 ; AVX512VPOPCNTDQ: # %bb.0: 142 ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 143 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 144 ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 145 ; AVX512VPOPCNTDQ-NEXT: retq 146 ; 147 ; AVX512VPOPCNTDQVL-LABEL: testv8i32: 148 ; AVX512VPOPCNTDQVL: # %bb.0: 149 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 150 ; AVX512VPOPCNTDQVL-NEXT: retq 151 ; 152 ; BITALG_NOVLX-LABEL: testv8i32: 153 ; BITALG_NOVLX: # %bb.0: 154 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 155 ; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm2 156 ; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 157 ; BITALG_NOVLX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 158 ; BITALG_NOVLX-NEXT: vpsrlw $4, %ymm0, %ymm0 159 ; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 160 ; BITALG_NOVLX-NEXT: vpshufb %ymm0, %ymm3, %ymm0 161 ; BITALG_NOVLX-NEXT: vpaddb %ymm2, %ymm0, %ymm0 162 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 163 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 164 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 165 ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 166 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 167 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 168 ; BITALG_NOVLX-NEXT: retq 169 ; 170 ; BITALG-LABEL: testv8i32: 171 ; BITALG: # %bb.0: 172 ; BITALG-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 173 ; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm2 174 ; BITALG-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 175 ; BITALG-NEXT: vpshufb %ymm2, %ymm3, %ymm2 176 ; BITALG-NEXT: vpsrlw $4, %ymm0, %ymm0 177 ; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 178 ; BITALG-NEXT: vpshufb %ymm0, %ymm3, %ymm0 179 ; BITALG-NEXT: vpaddb %ymm2, %ymm0, %ymm0 180 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 181 ; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 182 ; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 183 ; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 184 ; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 185 ; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 186 ; BITALG-NEXT: retq 187 %out = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %in) 188 ret <8 x i32> %out 189 } 190 191 define <16 x i16> @testv16i16(<16 x i16> %in) nounwind { 192 ; AVX1-LABEL: testv16i16: 193 ; AVX1: # %bb.0: 194 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 195 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 196 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 197 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 198 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 199 ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 200 ; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 201 ; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 202 ; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 203 ; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 204 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 205 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 206 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 207 ; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 208 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 209 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 210 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 211 ; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 212 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 213 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 214 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 215 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 216 ; AVX1-NEXT: retq 217 ; 218 ; AVX2-LABEL: testv16i16: 219 ; AVX2: # %bb.0: 220 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 221 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 222 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 223 ; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 224 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 225 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 226 ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 227 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 228 ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 229 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 230 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 231 ; AVX2-NEXT: retq 232 ; 233 ; AVX512VPOPCNTDQ-LABEL: testv16i16: 234 ; AVX512VPOPCNTDQ: # %bb.0: 235 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 236 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 237 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 238 ; AVX512VPOPCNTDQ-NEXT: retq 239 ; 240 ; AVX512VPOPCNTDQVL-LABEL: testv16i16: 241 ; AVX512VPOPCNTDQVL: # %bb.0: 242 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 243 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 244 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 245 ; AVX512VPOPCNTDQVL-NEXT: retq 246 ; 247 ; BITALG_NOVLX-LABEL: testv16i16: 248 ; BITALG_NOVLX: # %bb.0: 249 ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 250 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 251 ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 252 ; BITALG_NOVLX-NEXT: retq 253 ; 254 ; BITALG-LABEL: testv16i16: 255 ; BITALG: # %bb.0: 256 ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 257 ; BITALG-NEXT: retq 258 %out = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %in) 259 ret <16 x i16> %out 260 } 261 262 define <32 x i8> @testv32i8(<32 x i8> %in) nounwind { 263 ; AVX1-LABEL: testv32i8: 264 ; AVX1: # %bb.0: 265 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 266 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 267 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 268 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 269 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 270 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 271 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 272 ; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1 273 ; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 274 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3 275 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3 276 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 277 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 278 ; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0 279 ; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 280 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 281 ; AVX1-NEXT: retq 282 ; 283 ; AVX2-LABEL: testv32i8: 284 ; AVX2: # %bb.0: 285 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 286 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 287 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 288 ; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 289 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 290 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 291 ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 292 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 293 ; AVX2-NEXT: retq 294 ; 295 ; AVX512VPOPCNTDQ-LABEL: testv32i8: 296 ; AVX512VPOPCNTDQ: # %bb.0: 297 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 298 ; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 299 ; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 300 ; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 301 ; AVX512VPOPCNTDQ-NEXT: vpsrlw $4, %ymm0, %ymm0 302 ; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0 303 ; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0 304 ; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 305 ; AVX512VPOPCNTDQ-NEXT: retq 306 ; 307 ; AVX512VPOPCNTDQVL-LABEL: testv32i8: 308 ; AVX512VPOPCNTDQVL: # %bb.0: 309 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 310 ; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2 311 ; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 312 ; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 313 ; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0 314 ; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 315 ; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 316 ; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0 317 ; AVX512VPOPCNTDQVL-NEXT: retq 318 ; 319 ; BITALG_NOVLX-LABEL: testv32i8: 320 ; BITALG_NOVLX: # %bb.0: 321 ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 322 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 323 ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 324 ; BITALG_NOVLX-NEXT: retq 325 ; 326 ; BITALG-LABEL: testv32i8: 327 ; BITALG: # %bb.0: 328 ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 329 ; BITALG-NEXT: retq 330 %out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %in) 331 ret <32 x i8> %out 332 } 333 334 define <4 x i64> @foldv4i64() nounwind { 335 ; ALL-LABEL: foldv4i64: 336 ; ALL: # %bb.0: 337 ; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [1,64,0,8] 338 ; ALL-NEXT: retq 339 %out = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>) 340 ret <4 x i64> %out 341 } 342 343 define <8 x i32> @foldv8i32() nounwind { 344 ; ALL-LABEL: foldv8i32: 345 ; ALL: # %bb.0: 346 ; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [1,32,0,8,16,3,2,3] 347 ; ALL-NEXT: retq 348 %out = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>) 349 ret <8 x i32> %out 350 } 351 352 define <16 x i16> @foldv16i16() nounwind { 353 ; ALL-LABEL: foldv16i16: 354 ; ALL: # %bb.0: 355 ; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [1,16,0,8,0,3,2,3,15,7,1,1,1,1,1,1] 356 ; ALL-NEXT: retq 357 %out = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>) 358 ret <16 x i16> %out 359 } 360 361 define <32 x i8> @foldv32i8() nounwind { 362 ; ALL-LABEL: foldv32i8: 363 ; ALL: # %bb.0: 364 ; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1,1,1,0,0,1,2,3,4,5,6,7,8,2,2,3,7] 365 ; ALL-NEXT: retq 366 %out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>) 367 ret <32 x i8> %out 368 } 369 370 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) 371 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) 372 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) 373 declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) 374